| @@ -0,0 +1,29 @@ | ||
| using System; | ||
|
|
||
| namespace Ryujinx.Cpu | ||
| { | ||
| public sealed class WritableRegion : IDisposable | ||
| { | ||
| private readonly MemoryManager _mm; | ||
| private readonly ulong _va; | ||
|
|
||
| private bool NeedsWriteback => _mm != null; | ||
|
|
||
| public Memory<byte> Memory { get; } | ||
|
|
||
| internal WritableRegion(MemoryManager mm, ulong va, Memory<byte> memory) | ||
| { | ||
| _mm = mm; | ||
| _va = va; | ||
| Memory = memory; | ||
| } | ||
|
|
||
| public void Dispose() | ||
| { | ||
| if (NeedsWriteback) | ||
| { | ||
| _mm.Write(_va, Memory.Span); | ||
| } | ||
| } | ||
| } | ||
| } |
| @@ -0,0 +1,10 @@ | ||
| namespace Ryujinx.Graphics.Device | ||
| { | ||
| public enum AccessControl | ||
| { | ||
| None = 0, | ||
| ReadOnly = 1 << 0, | ||
| WriteOnly = 1 << 1, | ||
| ReadWrite = ReadOnly | WriteOnly | ||
| } | ||
| } |
| @@ -0,0 +1,124 @@ | ||
| using System; | ||
| using System.Collections; | ||
| using System.Collections.Generic; | ||
| using System.Diagnostics; | ||
| using System.Linq; | ||
| using System.Reflection; | ||
| using System.Runtime.CompilerServices; | ||
|
|
||
| namespace Ryujinx.Graphics.Device | ||
| { | ||
| public class DeviceState<TState> : IDeviceState where TState : unmanaged | ||
| { | ||
| private const int RegisterSize = sizeof(int); | ||
|
|
||
| public TState State; | ||
|
|
||
| private readonly BitArray _readableRegisters; | ||
| private readonly BitArray _writableRegisters; | ||
|
|
||
| private readonly Dictionary<int, Func<int>> _readCallbacks; | ||
| private readonly Dictionary<int, Action<int>> _writeCallbacks; | ||
|
|
||
| public DeviceState(IReadOnlyDictionary<string, RwCallback> callbacks = null) | ||
| { | ||
| int size = (Unsafe.SizeOf<TState>() + RegisterSize - 1) / RegisterSize; | ||
|
|
||
| _readableRegisters = new BitArray(size); | ||
| _writableRegisters = new BitArray(size); | ||
|
|
||
| _readCallbacks = new Dictionary<int, Func<int>>(); | ||
| _writeCallbacks = new Dictionary<int, Action<int>>(); | ||
|
|
||
| var fields = typeof(TState).GetFields(); | ||
| int offset = 0; | ||
|
|
||
| for (int fieldIndex = 0; fieldIndex < fields.Length; fieldIndex++) | ||
| { | ||
| var field = fields[fieldIndex]; | ||
| var regAttr = field.GetCustomAttributes<RegisterAttribute>(false).FirstOrDefault(); | ||
|
|
||
| int sizeOfField = SizeCalculator.SizeOf(field.FieldType); | ||
|
|
||
| for (int i = 0; i < ((sizeOfField + 3) & ~3); i += 4) | ||
| { | ||
| _readableRegisters[(offset + i) / RegisterSize] = regAttr?.AccessControl.HasFlag(AccessControl.ReadOnly) ?? true; | ||
| _writableRegisters[(offset + i) / RegisterSize] = regAttr?.AccessControl.HasFlag(AccessControl.WriteOnly) ?? true; | ||
| } | ||
|
|
||
| if (callbacks != null && callbacks.TryGetValue(field.Name, out var cb)) | ||
| { | ||
| if (cb.Read != null) | ||
| { | ||
| _readCallbacks.Add(offset, cb.Read); | ||
| } | ||
|
|
||
| if (cb.Write != null) | ||
| { | ||
| _writeCallbacks.Add(offset, cb.Write); | ||
| } | ||
| } | ||
|
|
||
| offset += sizeOfField; | ||
| } | ||
|
|
||
| Debug.Assert(offset == Unsafe.SizeOf<TState>()); | ||
| } | ||
|
|
||
| public virtual int Read(int offset) | ||
| { | ||
| if (Check(offset) && _readableRegisters[offset / RegisterSize]) | ||
| { | ||
| int alignedOffset = Align(offset); | ||
|
|
||
| if (_readCallbacks.TryGetValue(alignedOffset, out Func<int> read)) | ||
| { | ||
| return read(); | ||
| } | ||
| else | ||
| { | ||
| return GetRef<int>(alignedOffset); | ||
| } | ||
| } | ||
|
|
||
| return 0; | ||
| } | ||
|
|
||
| public virtual void Write(int offset, int data) | ||
| { | ||
| if (Check(offset) && _writableRegisters[offset / RegisterSize]) | ||
| { | ||
| int alignedOffset = Align(offset); | ||
|
|
||
| if (_writeCallbacks.TryGetValue(alignedOffset, out Action<int> write)) | ||
| { | ||
| write(data); | ||
| } | ||
| else | ||
| { | ||
| GetRef<int>(alignedOffset) = data; | ||
| } | ||
| } | ||
| } | ||
|
|
||
| private bool Check(int offset) | ||
| { | ||
| return (uint)Align(offset) < Unsafe.SizeOf<TState>(); | ||
| } | ||
|
|
||
| public ref T GetRef<T>(int offset) where T : unmanaged | ||
| { | ||
| if ((uint)(offset + Unsafe.SizeOf<T>()) > Unsafe.SizeOf<TState>()) | ||
| { | ||
| throw new ArgumentOutOfRangeException(nameof(offset)); | ||
| } | ||
|
|
||
| return ref Unsafe.As<TState, T>(ref Unsafe.AddByteOffset(ref State, (IntPtr)offset)); | ||
| } | ||
|
|
||
| private static int Align(int offset) | ||
| { | ||
| return offset & ~(RegisterSize - 1); | ||
| } | ||
| } | ||
| } |
| @@ -0,0 +1,8 @@ | ||
| namespace Ryujinx.Graphics.Device | ||
| { | ||
| public interface IDeviceState | ||
| { | ||
| int Read(int offset); | ||
| void Write(int offset, int data); | ||
| } | ||
| } |
| @@ -0,0 +1,15 @@ | ||
| using System; | ||
|
|
||
| namespace Ryujinx.Graphics.Device | ||
| { | ||
| [AttributeUsage(AttributeTargets.Field, AllowMultiple = false)] | ||
| public sealed class RegisterAttribute : Attribute | ||
| { | ||
| public AccessControl AccessControl { get; } | ||
|
|
||
| public RegisterAttribute(AccessControl ac) | ||
| { | ||
| AccessControl = ac; | ||
| } | ||
| } | ||
| } |
| @@ -0,0 +1,16 @@ | ||
| using System; | ||
|
|
||
| namespace Ryujinx.Graphics.Device | ||
| { | ||
| public struct RwCallback | ||
| { | ||
| public Action<int> Write { get; } | ||
| public Func<int> Read { get; } | ||
|
|
||
| public RwCallback(Action<int> write, Func<int> read) | ||
| { | ||
| Write = write; | ||
| Read = read; | ||
| } | ||
| } | ||
| } |
| @@ -0,0 +1,7 @@ | ||
| <Project Sdk="Microsoft.NET.Sdk"> | ||
|
|
||
| <PropertyGroup> | ||
| <TargetFramework>netcoreapp3.1</TargetFramework> | ||
| </PropertyGroup> | ||
|
|
||
| </Project> |
| @@ -0,0 +1,63 @@ | ||
| using System; | ||
| using System.Reflection; | ||
|
|
||
| namespace Ryujinx.Graphics.Device | ||
| { | ||
| static class SizeCalculator | ||
| { | ||
| public static int SizeOf(Type type) | ||
| { | ||
| // Is type a enum type? | ||
| if (type.IsEnum) | ||
| { | ||
| type = type.GetEnumUnderlyingType(); | ||
| } | ||
|
|
||
| // Is type a pointer type? | ||
| if (type.IsPointer || type == typeof(IntPtr) || type == typeof(UIntPtr)) | ||
| { | ||
| return IntPtr.Size; | ||
| } | ||
|
|
||
| // Is type a struct type? | ||
| if (type.IsValueType && !type.IsPrimitive) | ||
| { | ||
| // Check if the struct has a explicit size, if so, return that. | ||
| if (type.StructLayoutAttribute.Size != 0) | ||
| { | ||
| return type.StructLayoutAttribute.Size; | ||
| } | ||
|
|
||
| // Otherwise we calculate the sum of the sizes of all fields. | ||
| int size = 0; | ||
| var fields = type.GetFields(BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Instance); | ||
|
|
||
| for (int fieldIndex = 0; fieldIndex < fields.Length; fieldIndex++) | ||
| { | ||
| size += SizeOf(fields[fieldIndex].FieldType); | ||
| } | ||
|
|
||
| return size; | ||
| } | ||
|
|
||
| // Primitive types. | ||
| return (Type.GetTypeCode(type)) switch | ||
| { | ||
| TypeCode.SByte => sizeof(sbyte), | ||
| TypeCode.Byte => sizeof(byte), | ||
| TypeCode.Int16 => sizeof(short), | ||
| TypeCode.UInt16 => sizeof(ushort), | ||
| TypeCode.Int32 => sizeof(int), | ||
| TypeCode.UInt32 => sizeof(uint), | ||
| TypeCode.Int64 => sizeof(long), | ||
| TypeCode.UInt64 => sizeof(ulong), | ||
| TypeCode.Char => sizeof(char), | ||
| TypeCode.Single => sizeof(float), | ||
| TypeCode.Double => sizeof(double), | ||
| TypeCode.Decimal => sizeof(decimal), | ||
| TypeCode.Boolean => sizeof(bool), | ||
| _ => throw new ArgumentException($"Length for type \"{type.Name}\" is unknown.") | ||
| }; | ||
| } | ||
| } | ||
| } |
| @@ -0,0 +1,20 @@ | ||
| namespace Ryujinx.Graphics.Host1x | ||
| { | ||
| public enum ClassId | ||
| { | ||
| Host1x = 0x1, | ||
| Mpeg = 0x20, | ||
| Nvenc = 0x21, | ||
| Vi = 0x30, | ||
| Isp = 0x32, | ||
| Ispb = 0x34, | ||
| Vii2c = 0x36, | ||
| Vic = 0x5d, | ||
| Gr3d = 0x60, | ||
| Gpu = 0x61, | ||
| Tsec = 0xe0, | ||
| Tsecb = 0xe1, | ||
| Nvjpg = 0xc0, | ||
| Nvdec = 0xf0 | ||
| } | ||
| } |
| @@ -0,0 +1,32 @@ | ||
| using Ryujinx.Graphics.Device; | ||
| using System; | ||
| using System.Collections.Generic; | ||
|
|
||
| namespace Ryujinx.Graphics.Host1x | ||
| { | ||
| class Devices : IDisposable | ||
| { | ||
| private readonly Dictionary<ClassId, IDeviceState> _devices = new Dictionary<ClassId, IDeviceState>(); | ||
|
|
||
| public void RegisterDevice(ClassId classId, IDeviceState device) | ||
| { | ||
| _devices[classId] = device; | ||
| } | ||
|
|
||
| public IDeviceState GetDevice(ClassId classId) | ||
| { | ||
| return _devices.TryGetValue(classId, out IDeviceState device) ? device : null; | ||
| } | ||
|
|
||
| public void Dispose() | ||
| { | ||
| foreach (var device in _devices.Values) | ||
| { | ||
| if (device is ThiDevice thi) | ||
| { | ||
| thi.Dispose(); | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } |
| @@ -0,0 +1,33 @@ | ||
| using Ryujinx.Graphics.Device; | ||
| using Ryujinx.Graphics.Gpu.Synchronization; | ||
| using System.Collections.Generic; | ||
| using System.Threading; | ||
|
|
||
| namespace Ryujinx.Graphics.Host1x | ||
| { | ||
| public class Host1xClass : IDeviceState | ||
| { | ||
| private readonly SynchronizationManager _syncMgr; | ||
| private readonly DeviceState<Host1xClassRegisters> _state; | ||
|
|
||
| public Host1xClass(SynchronizationManager syncMgr) | ||
| { | ||
| _syncMgr = syncMgr; | ||
| _state = new DeviceState<Host1xClassRegisters>(new Dictionary<string, RwCallback> | ||
| { | ||
| { nameof(Host1xClassRegisters.WaitSyncpt32), new RwCallback(WaitSyncpt32, null) } | ||
| }); | ||
| } | ||
|
|
||
| public int Read(int offset) => _state.Read(offset); | ||
| public void Write(int offset, int data) => _state.Write(offset, data); | ||
|
|
||
| private void WaitSyncpt32(int data) | ||
| { | ||
| uint syncpointId = (uint)(data & 0xFF); | ||
| uint threshold = _state.State.LoadSyncptPayload32; | ||
|
|
||
| _syncMgr.WaitOnSyncpoint(syncpointId, threshold, Timeout.InfiniteTimeSpan); | ||
| } | ||
| } | ||
| } |
| @@ -0,0 +1,41 @@ | ||
| using Ryujinx.Common.Memory; | ||
|
|
||
| namespace Ryujinx.Graphics.Host1x | ||
| { | ||
| struct Host1xClassRegisters | ||
| { | ||
| public uint IncrSyncpt; | ||
| public uint IncrSyncptCntrl; | ||
| public uint IncrSyncptError; | ||
| public Array5<uint> ReservedC; | ||
| public uint WaitSyncpt; | ||
| public uint WaitSyncptBase; | ||
| public uint WaitSyncptIncr; | ||
| public uint LoadSyncptBase; | ||
| public uint IncrSyncptBase; | ||
| public uint Clear; | ||
| public uint Wait; | ||
| public uint WaitWithIntr; | ||
| public uint DelayUsec; | ||
| public uint TickcountHi; | ||
| public uint TickcountLo; | ||
| public uint Tickctrl; | ||
| public Array23<uint> Reserved50; | ||
| public uint Indctrl; | ||
| public uint Indoff2; | ||
| public uint Indoff; | ||
| public Array31<uint> Inddata; | ||
| public uint Reserved134; | ||
| public uint LoadSyncptPayload32; | ||
| public uint Stallctrl; | ||
| public uint WaitSyncpt32; | ||
| public uint WaitSyncptBase32; | ||
| public uint LoadSyncptBase32; | ||
| public uint IncrSyncptBase32; | ||
| public uint StallcountHi; | ||
| public uint StallcountLo; | ||
| public uint Xrefctrl; | ||
| public uint ChannelXrefHi; | ||
| public uint ChannelXrefLo; | ||
| } | ||
| } |
| @@ -0,0 +1,123 @@ | ||
| using Ryujinx.Common; | ||
| using Ryujinx.Common.Logging; | ||
| using Ryujinx.Graphics.Device; | ||
| using Ryujinx.Graphics.Gpu.Synchronization; | ||
| using System; | ||
| using System.Numerics; | ||
|
|
||
| namespace Ryujinx.Graphics.Host1x | ||
| { | ||
| public sealed class Host1xDevice : IDisposable | ||
| { | ||
| private readonly SyncptIncrManager _syncptIncrMgr; | ||
| private readonly AsyncWorkQueue<int[]> _commandQueue; | ||
|
|
||
| private readonly Devices _devices = new Devices(); | ||
|
|
||
| public Host1xClass Class { get; } | ||
|
|
||
| private IDeviceState _device; | ||
|
|
||
| private int _count; | ||
| private int _offset; | ||
| private int _mask; | ||
| private bool _incrementing; | ||
|
|
||
| public Host1xDevice(SynchronizationManager syncMgr) | ||
| { | ||
| _syncptIncrMgr = new SyncptIncrManager(syncMgr); | ||
| _commandQueue = new AsyncWorkQueue<int[]>(Process, "Ryujinx.Host1xProcessor"); | ||
|
|
||
| Class = new Host1xClass(syncMgr); | ||
|
|
||
| _devices.RegisterDevice(ClassId.Host1x, Class); | ||
| } | ||
|
|
||
| public void RegisterDevice(ClassId classId, IDeviceState device) | ||
| { | ||
| var thi = new ThiDevice(classId, device ?? throw new ArgumentNullException(nameof(device)), _syncptIncrMgr); | ||
| _devices.RegisterDevice(classId, thi); | ||
| } | ||
|
|
||
| public void Submit(ReadOnlySpan<int> commandBuffer) | ||
| { | ||
| _commandQueue.Add(commandBuffer.ToArray()); | ||
| } | ||
|
|
||
| private void Process(int[] commandBuffer) | ||
| { | ||
| for (int index = 0; index < commandBuffer.Length; index++) | ||
| { | ||
| Step(commandBuffer[index]); | ||
| } | ||
| } | ||
|
|
||
| private void Step(int value) | ||
| { | ||
| if (_mask != 0) | ||
| { | ||
| int lbs = BitOperations.TrailingZeroCount(_mask); | ||
|
|
||
| _mask &= ~(1 << lbs); | ||
|
|
||
| DeviceWrite(_offset + lbs, value); | ||
|
|
||
| return; | ||
| } | ||
| else if (_count != 0) | ||
| { | ||
| _count--; | ||
|
|
||
| DeviceWrite(_offset, value); | ||
|
|
||
| if (_incrementing) | ||
| { | ||
| _offset++; | ||
| } | ||
|
|
||
| return; | ||
| } | ||
|
|
||
| OpCode opCode = (OpCode)((value >> 28) & 0xf); | ||
|
|
||
| switch (opCode) | ||
| { | ||
| case OpCode.SetClass: | ||
| _mask = value & 0x3f; | ||
| ClassId classId = (ClassId)((value >> 6) & 0x3ff); | ||
| _offset = (value >> 16) & 0xfff; | ||
| _device = _devices.GetDevice(classId); | ||
| break; | ||
| case OpCode.Incr: | ||
| case OpCode.NonIncr: | ||
| _count = value & 0xffff; | ||
| _offset = (value >> 16) & 0xfff; | ||
| _incrementing = opCode == OpCode.Incr; | ||
| break; | ||
| case OpCode.Mask: | ||
| _mask = value & 0xffff; | ||
| _offset = (value >> 16) & 0xfff; | ||
| break; | ||
| case OpCode.Imm: | ||
| int data = value & 0xfff; | ||
| _offset = (value >> 16) & 0xfff; | ||
| DeviceWrite(_offset, data); | ||
| break; | ||
| default: | ||
| Logger.PrintError(LogClass.Host1x, $"Unsupported opcode \"{opCode}\"."); | ||
| break; | ||
| } | ||
| } | ||
|
|
||
| private void DeviceWrite(int offset, int data) | ||
| { | ||
| _device?.Write(offset * 4, data); | ||
| } | ||
|
|
||
| public void Dispose() | ||
| { | ||
| _commandQueue.Dispose(); | ||
| _devices.Dispose(); | ||
| } | ||
| } | ||
| } |
| @@ -0,0 +1,21 @@ | ||
| namespace Ryujinx.Graphics.Host1x | ||
| { | ||
| enum OpCode | ||
| { | ||
| SetClass, | ||
| Incr, | ||
| NonIncr, | ||
| Mask, | ||
| Imm, | ||
| Restart, | ||
| Gather, | ||
| SetStrmId, | ||
| SetAppId, | ||
| SetPyld, | ||
| IncrW, | ||
| NonIncrW, | ||
| GatherW, | ||
| RestartW, | ||
| Extend | ||
| } | ||
| } |
| @@ -0,0 +1,20 @@ | ||
| <Project Sdk="Microsoft.NET.Sdk"> | ||
|
|
||
| <PropertyGroup> | ||
| <TargetFramework>netcoreapp3.1</TargetFramework> | ||
| </PropertyGroup> | ||
|
|
||
| <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|AnyCPU'"> | ||
| <AllowUnsafeBlocks>false</AllowUnsafeBlocks> | ||
| </PropertyGroup> | ||
|
|
||
| <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|AnyCPU'"> | ||
| <AllowUnsafeBlocks>false</AllowUnsafeBlocks> | ||
| </PropertyGroup> | ||
|
|
||
| <ItemGroup> | ||
| <ProjectReference Include="..\Ryujinx.Graphics.Device\Ryujinx.Graphics.Device.csproj" /> | ||
| <ProjectReference Include="..\Ryujinx.Graphics.Gpu\Ryujinx.Graphics.Gpu.csproj" /> | ||
| </ItemGroup> | ||
|
|
||
| </Project> |
| @@ -0,0 +1,99 @@ | ||
| using Ryujinx.Graphics.Gpu.Synchronization; | ||
| using System.Collections.Generic; | ||
|
|
||
| namespace Ryujinx.Graphics.Host1x | ||
| { | ||
| class SyncptIncrManager | ||
| { | ||
| private readonly SynchronizationManager _syncMgr; | ||
|
|
||
| private struct SyncptIncr | ||
| { | ||
| public uint Id { get; } | ||
| public ClassId ClassId { get; } | ||
| public uint SyncptId { get; } | ||
| public bool Done { get; } | ||
|
|
||
| public SyncptIncr(uint id, ClassId classId, uint syncptId, bool done = false) | ||
| { | ||
| Id = id; | ||
| ClassId = classId; | ||
| SyncptId = syncptId; | ||
| Done = done; | ||
| } | ||
| } | ||
|
|
||
| private readonly List<SyncptIncr> _incrs = new List<SyncptIncr>(); | ||
|
|
||
| private uint _currentId; | ||
|
|
||
| public SyncptIncrManager(SynchronizationManager syncMgr) | ||
| { | ||
| _syncMgr = syncMgr; | ||
| } | ||
|
|
||
| public void Increment(uint id) | ||
| { | ||
| lock (_incrs) | ||
| { | ||
| _incrs.Add(new SyncptIncr(0, 0, id, true)); | ||
|
|
||
| IncrementAllDone(); | ||
| } | ||
| } | ||
|
|
||
| public uint IncrementWhenDone(ClassId classId, uint id) | ||
| { | ||
| lock (_incrs) | ||
| { | ||
| uint handle = _currentId++; | ||
|
|
||
| _incrs.Add(new SyncptIncr(handle, classId, id)); | ||
|
|
||
| return handle; | ||
| } | ||
| } | ||
|
|
||
| public void SignalDone(uint handle) | ||
| { | ||
| lock (_incrs) | ||
| { | ||
| // Set pending increment with the given handle to "done". | ||
| for (int i = 0; i < _incrs.Count; i++) | ||
| { | ||
| SyncptIncr incr = _incrs[i]; | ||
|
|
||
| if (_incrs[i].Id == handle) | ||
| { | ||
| _incrs[i] = new SyncptIncr(incr.Id, incr.ClassId, incr.SyncptId, true); | ||
|
|
||
| break; | ||
| } | ||
| } | ||
|
|
||
| IncrementAllDone(); | ||
| } | ||
| } | ||
|
|
||
| private void IncrementAllDone() | ||
| { | ||
| lock (_incrs) | ||
| { | ||
| // Increment all sequential pending increments that are already done. | ||
| int doneCount = 0; | ||
|
|
||
| for (; doneCount < _incrs.Count; doneCount++) | ||
| { | ||
| if (!_incrs[doneCount].Done) | ||
| { | ||
| break; | ||
| } | ||
|
|
||
| _syncMgr.IncrementSyncpoint(_incrs[doneCount].SyncptId); | ||
| } | ||
|
|
||
| _incrs.RemoveRange(0, doneCount); | ||
| } | ||
| } | ||
| } | ||
| } |
| @@ -0,0 +1,96 @@ | ||
| using Ryujinx.Common; | ||
| using Ryujinx.Graphics.Device; | ||
| using System; | ||
| using System.Collections.Generic; | ||
|
|
||
| namespace Ryujinx.Graphics.Host1x | ||
| { | ||
| class ThiDevice : IDeviceState, IDisposable | ||
| { | ||
| private readonly ClassId _classId; | ||
| private readonly IDeviceState _device; | ||
|
|
||
| private readonly SyncptIncrManager _syncptIncrMgr; | ||
|
|
||
| private class CommandAction | ||
| { | ||
| public int Data { get; } | ||
|
|
||
| public CommandAction(int data) | ||
| { | ||
| Data = data; | ||
| } | ||
| } | ||
|
|
||
| private class MethodCallAction : CommandAction | ||
| { | ||
| public int Method { get; } | ||
|
|
||
| public MethodCallAction(int method, int data) : base(data) | ||
| { | ||
| Method = method; | ||
| } | ||
| } | ||
|
|
||
| private class SyncptIncrAction : CommandAction | ||
| { | ||
| public SyncptIncrAction(uint syncptIncrHandle) : base((int)syncptIncrHandle) | ||
| { | ||
| } | ||
| } | ||
|
|
||
| private readonly AsyncWorkQueue<CommandAction> _commandQueue; | ||
|
|
||
| private readonly DeviceState<ThiRegisters> _state; | ||
|
|
||
| public ThiDevice(ClassId classId, IDeviceState device, SyncptIncrManager syncptIncrMgr) | ||
| { | ||
| _classId = classId; | ||
| _device = device; | ||
| _syncptIncrMgr = syncptIncrMgr; | ||
| _commandQueue = new AsyncWorkQueue<CommandAction>(Process, $"Ryujinx.{classId}Processor"); | ||
| _state = new DeviceState<ThiRegisters>(new Dictionary<string, RwCallback> | ||
| { | ||
| { nameof(ThiRegisters.IncrSyncpt), new RwCallback(IncrSyncpt, null) }, | ||
| { nameof(ThiRegisters.Method1), new RwCallback(Method1, null) } | ||
| }); | ||
| } | ||
|
|
||
| public int Read(int offset) => _state.Read(offset); | ||
| public void Write(int offset, int data) => _state.Write(offset, data); | ||
|
|
||
| private void IncrSyncpt(int data) | ||
| { | ||
| uint syncpointId = (uint)(data & 0xFF); | ||
| uint cond = (uint)((data >> 8) & 0xFF); // 0 = Immediate, 1 = Done | ||
|
|
||
| if (cond == 0) | ||
| { | ||
| _syncptIncrMgr.Increment(syncpointId); | ||
| } | ||
| else | ||
| { | ||
| _commandQueue.Add(new SyncptIncrAction(_syncptIncrMgr.IncrementWhenDone(_classId, syncpointId))); | ||
| } | ||
| } | ||
|
|
||
| private void Method1(int data) | ||
| { | ||
| _commandQueue.Add(new MethodCallAction((int)_state.State.Method0 * 4, data)); | ||
| } | ||
|
|
||
| private void Process(CommandAction cmdAction) | ||
| { | ||
| if (cmdAction is SyncptIncrAction syncptIncrAction) | ||
| { | ||
| _syncptIncrMgr.SignalDone((uint)syncptIncrAction.Data); | ||
| } | ||
| else if (cmdAction is MethodCallAction methodCallAction) | ||
| { | ||
| _device.Write(methodCallAction.Method, methodCallAction.Data); | ||
| } | ||
| } | ||
|
|
||
| public void Dispose() => _commandQueue.Dispose(); | ||
| } | ||
| } |
| @@ -0,0 +1,22 @@ | ||
| using Ryujinx.Common.Memory; | ||
|
|
||
| namespace Ryujinx.Graphics.Host1x | ||
| { | ||
| struct ThiRegisters | ||
| { | ||
| public uint IncrSyncpt; | ||
| public uint Reserved4; | ||
| public uint IncrSyncptErr; | ||
| public uint CtxswIncrSyncpt; | ||
| public Array4<uint> Reserved10; | ||
| public uint Ctxsw; | ||
| public uint Reserved24; | ||
| public uint ContSyncptEof; | ||
| public Array5<uint> Reserved2C; | ||
| public uint Method0; | ||
| public uint Method1; | ||
| public Array12<uint> Reserved48; | ||
| public uint IntStatus; | ||
| public uint IntMask; | ||
| } | ||
| } |
| @@ -0,0 +1,40 @@ | ||
| using Ryujinx.Graphics.Video; | ||
| using System; | ||
|
|
||
| namespace Ryujinx.Graphics.Nvdec.H264 | ||
| { | ||
| public class Decoder : IH264Decoder | ||
| { | ||
| public bool IsHardwareAccelerated => false; | ||
|
|
||
| private const int WorkBufferSize = 0x200; | ||
|
|
||
| private readonly byte[] _workBuffer = new byte[WorkBufferSize]; | ||
|
|
||
| private readonly FFmpegContext _context = new FFmpegContext(); | ||
|
|
||
| public ISurface CreateSurface(int width, int height) | ||
| { | ||
| return new Surface(); | ||
| } | ||
|
|
||
| public bool Decode(ref H264PictureInfo pictureInfo, ISurface output, ReadOnlySpan<byte> bitstream) | ||
| { | ||
| Span<byte> bs = Prepend(bitstream, SpsAndPpsReconstruction.Reconstruct(ref pictureInfo, _workBuffer)); | ||
|
|
||
| return _context.DecodeFrame((Surface)output, bs) == 0; | ||
| } | ||
|
|
||
| private static byte[] Prepend(ReadOnlySpan<byte> data, ReadOnlySpan<byte> prep) | ||
| { | ||
| byte[] output = new byte[data.Length + prep.Length]; | ||
|
|
||
| prep.CopyTo(output); | ||
| data.CopyTo(new Span<byte>(output).Slice(prep.Length)); | ||
|
|
||
| return output; | ||
| } | ||
|
|
||
| public void Dispose() => _context.Dispose(); | ||
| } | ||
| } |
| @@ -0,0 +1,51 @@ | ||
| using FFmpeg.AutoGen; | ||
| using System; | ||
|
|
||
| namespace Ryujinx.Graphics.Nvdec.H264 | ||
| { | ||
| unsafe class FFmpegContext : IDisposable | ||
| { | ||
| private readonly AVCodec* _codec; | ||
| private AVCodecContext* _context; | ||
|
|
||
| public FFmpegContext() | ||
| { | ||
| _codec = ffmpeg.avcodec_find_decoder(AVCodecID.AV_CODEC_ID_H264); | ||
| _context = ffmpeg.avcodec_alloc_context3(_codec); | ||
|
|
||
| ffmpeg.avcodec_open2(_context, _codec, null); | ||
| } | ||
|
|
||
| public int DecodeFrame(Surface output, ReadOnlySpan<byte> bitstream) | ||
| { | ||
| AVPacket packet; | ||
|
|
||
| ffmpeg.av_init_packet(&packet); | ||
|
|
||
| fixed (byte* ptr = bitstream) | ||
| { | ||
| packet.data = ptr; | ||
| packet.size = bitstream.Length; | ||
|
|
||
| int rc = ffmpeg.avcodec_send_packet(_context, &packet); | ||
|
|
||
| if (rc != 0) | ||
| { | ||
| return rc; | ||
| } | ||
| } | ||
|
|
||
| return ffmpeg.avcodec_receive_frame(_context, output.Frame); | ||
| } | ||
|
|
||
| public void Dispose() | ||
| { | ||
| ffmpeg.avcodec_close(_context); | ||
|
|
||
| fixed (AVCodecContext** ppContext = &_context) | ||
| { | ||
| ffmpeg.avcodec_free_context(ppContext); | ||
| } | ||
| } | ||
| } | ||
| } |
| @@ -0,0 +1,121 @@ | ||
| using System; | ||
| using System.Numerics; | ||
|
|
||
| namespace Ryujinx.Graphics.Nvdec.H264 | ||
| { | ||
| struct H264BitStreamWriter | ||
| { | ||
| private const int BufferSize = 8; | ||
|
|
||
| private readonly byte[] _workBuffer; | ||
|
|
||
| private int _offset; | ||
| private int _buffer; | ||
| private int _bufferPos; | ||
|
|
||
| public H264BitStreamWriter(byte[] workBuffer) | ||
| { | ||
| _workBuffer = workBuffer; | ||
| _offset = 0; | ||
| _buffer = 0; | ||
| _bufferPos = 0; | ||
| } | ||
|
|
||
| public void WriteBit(bool value) | ||
| { | ||
| WriteBits(value ? 1 : 0, 1); | ||
| } | ||
|
|
||
| public void WriteBits(int value, int valueSize) | ||
| { | ||
| int valuePos = 0; | ||
|
|
||
| int remaining = valueSize; | ||
|
|
||
| while (remaining > 0) | ||
| { | ||
| int copySize = remaining; | ||
|
|
||
| int free = GetFreeBufferBits(); | ||
|
|
||
| if (copySize > free) | ||
| { | ||
| copySize = free; | ||
| } | ||
|
|
||
| int mask = (1 << copySize) - 1; | ||
|
|
||
| int srcShift = (valueSize - valuePos) - copySize; | ||
| int dstShift = (BufferSize - _bufferPos) - copySize; | ||
|
|
||
| _buffer |= ((value >> srcShift) & mask) << dstShift; | ||
|
|
||
| valuePos += copySize; | ||
| _bufferPos += copySize; | ||
| remaining -= copySize; | ||
| } | ||
| } | ||
|
|
||
| private int GetFreeBufferBits() | ||
| { | ||
| if (_bufferPos == BufferSize) | ||
| { | ||
| Flush(); | ||
| } | ||
|
|
||
| return BufferSize - _bufferPos; | ||
| } | ||
|
|
||
| public void Flush() | ||
| { | ||
| if (_bufferPos != 0) | ||
| { | ||
| _workBuffer[_offset++] = (byte)_buffer; | ||
|
|
||
| _buffer = 0; | ||
| _bufferPos = 0; | ||
| } | ||
| } | ||
|
|
||
| public void End() | ||
| { | ||
| WriteBit(true); | ||
|
|
||
| Flush(); | ||
| } | ||
|
|
||
| public Span<byte> AsSpan() | ||
| { | ||
| return new Span<byte>(_workBuffer).Slice(0, _offset); | ||
| } | ||
|
|
||
| public void WriteU(uint value, int valueSize) => WriteBits((int)value, valueSize); | ||
| public void WriteSe(int value) => WriteExpGolombCodedInt(value); | ||
| public void WriteUe(uint value) => WriteExpGolombCodedUInt(value); | ||
|
|
||
| private void WriteExpGolombCodedInt(int value) | ||
| { | ||
| int sign = value <= 0 ? 0 : 1; | ||
|
|
||
| if (value < 0) | ||
| { | ||
| value = -value; | ||
| } | ||
|
|
||
| value = (value << 1) - sign; | ||
|
|
||
| WriteExpGolombCodedUInt((uint)value); | ||
| } | ||
|
|
||
| private void WriteExpGolombCodedUInt(uint value) | ||
| { | ||
| int size = 32 - BitOperations.LeadingZeroCount(value + 1); | ||
|
|
||
| WriteBits(1, size); | ||
|
|
||
| value -= (1u << (size - 1)) - 1; | ||
|
|
||
| WriteBits((int)value, size - 1); | ||
| } | ||
| } | ||
| } |
| @@ -0,0 +1,23 @@ | ||
| <Project Sdk="Microsoft.NET.Sdk"> | ||
|
|
||
| <PropertyGroup> | ||
| <TargetFramework>netcoreapp3.1</TargetFramework> | ||
| </PropertyGroup> | ||
|
|
||
| <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|AnyCPU'"> | ||
| <AllowUnsafeBlocks>true</AllowUnsafeBlocks> | ||
| </PropertyGroup> | ||
|
|
||
| <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|AnyCPU'"> | ||
| <AllowUnsafeBlocks>true</AllowUnsafeBlocks> | ||
| </PropertyGroup> | ||
|
|
||
| <ItemGroup> | ||
| <PackageReference Include="FFmpeg.AutoGen" Version="4.3.0" /> | ||
| </ItemGroup> | ||
|
|
||
| <ItemGroup> | ||
| <ProjectReference Include="..\Ryujinx.Graphics.Video\Ryujinx.Graphics.Video.csproj" /> | ||
| </ItemGroup> | ||
|
|
||
| </Project> |
| @@ -0,0 +1,159 @@ | ||
| using Ryujinx.Common.Memory; | ||
| using Ryujinx.Graphics.Video; | ||
| using System; | ||
|
|
||
| namespace Ryujinx.Graphics.Nvdec.H264 | ||
| { | ||
| static class SpsAndPpsReconstruction | ||
| { | ||
| public static Span<byte> Reconstruct(ref H264PictureInfo pictureInfo, byte[] workBuffer) | ||
| { | ||
| H264BitStreamWriter writer = new H264BitStreamWriter(workBuffer); | ||
|
|
||
| // Sequence Parameter Set. | ||
| writer.WriteU(1, 24); | ||
| writer.WriteU(0, 1); | ||
| writer.WriteU(3, 2); | ||
| writer.WriteU(7, 5); | ||
| writer.WriteU(100, 8); // Profile idc | ||
| writer.WriteU(0, 8); // Reserved | ||
| writer.WriteU(31, 8); // Level idc | ||
| writer.WriteUe(0); // Seq parameter set id | ||
| writer.WriteUe(pictureInfo.ChromaFormatIdc); | ||
|
|
||
| if (pictureInfo.ChromaFormatIdc == 3) | ||
| { | ||
| writer.WriteBit(false); // Separate colour plane flag | ||
| } | ||
|
|
||
| writer.WriteUe(0); // Bit depth luma minus 8 | ||
| writer.WriteUe(0); // Bit depth chroma minus 8 | ||
| writer.WriteBit(pictureInfo.QpprimeYZeroTransformBypassFlag); | ||
| writer.WriteBit(false); // Scaling matrix present flag | ||
|
|
||
| writer.WriteUe(pictureInfo.Log2MaxFrameNumMinus4); | ||
| writer.WriteUe(pictureInfo.PicOrderCntType); | ||
|
|
||
| if (pictureInfo.PicOrderCntType == 0) | ||
| { | ||
| writer.WriteUe(pictureInfo.Log2MaxPicOrderCntLsbMinus4); | ||
| } | ||
| else if (pictureInfo.PicOrderCntType == 1) | ||
| { | ||
| writer.WriteBit(pictureInfo.DeltaPicOrderAlwaysZeroFlag); | ||
|
|
||
| writer.WriteSe(0); // Offset for non-ref pic | ||
| writer.WriteSe(0); // Offset for top to bottom field | ||
| writer.WriteUe(0); // Num ref frames in pic order cnt cycle | ||
| } | ||
|
|
||
| writer.WriteUe(16); // Max num ref frames | ||
| writer.WriteBit(false); // Gaps in frame num value allowed flag | ||
| writer.WriteUe(pictureInfo.PicWidthInMbsMinus1); | ||
| writer.WriteUe(pictureInfo.PicHeightInMapUnitsMinus1); | ||
| writer.WriteBit(pictureInfo.FrameMbsOnlyFlag); | ||
|
|
||
| if (!pictureInfo.FrameMbsOnlyFlag) | ||
| { | ||
| writer.WriteBit(pictureInfo.MbAdaptiveFrameFieldFlag); | ||
| } | ||
|
|
||
| writer.WriteBit(pictureInfo.Direct8x8InferenceFlag); | ||
| writer.WriteBit(false); // Frame cropping flag | ||
| writer.WriteBit(false); // VUI parameter present flag | ||
|
|
||
| writer.End(); | ||
|
|
||
| // Picture Parameter Set. | ||
| writer.WriteU(1, 24); | ||
| writer.WriteU(0, 1); | ||
| writer.WriteU(3, 2); | ||
| writer.WriteU(8, 5); | ||
|
|
||
| writer.WriteUe(0); // Pic parameter set id | ||
| writer.WriteUe(0); // Seq parameter set id | ||
|
|
||
| writer.WriteBit(pictureInfo.EntropyCodingModeFlag); | ||
| writer.WriteBit(false); // Bottom field pic order in frame present flag | ||
| writer.WriteUe(0); // Num slice groups minus 1 | ||
| writer.WriteUe(pictureInfo.NumRefIdxL0ActiveMinus1); | ||
| writer.WriteUe(pictureInfo.NumRefIdxL1ActiveMinus1); | ||
| writer.WriteBit(pictureInfo.WeightedPredFlag); | ||
| writer.WriteU(pictureInfo.WeightedBipredIdc, 2); | ||
| writer.WriteSe(pictureInfo.PicInitQpMinus26); | ||
| writer.WriteSe(0); // Pic init qs minus 26 | ||
| writer.WriteSe(pictureInfo.ChromaQpIndexOffset); | ||
| writer.WriteBit(pictureInfo.DeblockingFilterControlPresentFlag); | ||
| writer.WriteBit(pictureInfo.ConstrainedIntraPredFlag); | ||
| writer.WriteBit(pictureInfo.RedundantPicCntPresentFlag); | ||
| writer.WriteBit(pictureInfo.Transform8x8ModeFlag); | ||
|
|
||
| writer.WriteBit(pictureInfo.ScalingMatrixPresent); | ||
|
|
||
| if (pictureInfo.ScalingMatrixPresent) | ||
| { | ||
| for (int index = 0; index < 6; index++) | ||
| { | ||
| writer.WriteBit(true); | ||
|
|
||
| WriteScalingList(ref writer, pictureInfo.ScalingLists4x4[index]); | ||
| } | ||
|
|
||
| if (pictureInfo.Transform8x8ModeFlag) | ||
| { | ||
| for (int index = 0; index < 2; index++) | ||
| { | ||
| writer.WriteBit(true); | ||
|
|
||
| WriteScalingList(ref writer, pictureInfo.ScalingLists8x8[index]); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| writer.WriteSe(pictureInfo.SecondChromaQpIndexOffset); | ||
|
|
||
| writer.End(); | ||
|
|
||
| return writer.AsSpan(); | ||
| } | ||
|
|
||
| // ZigZag LUTs from libavcodec. | ||
| private static readonly byte[] ZigZagDirect = new byte[] | ||
| { | ||
| 0, 1, 8, 16, 9, 2, 3, 10, | ||
| 17, 24, 32, 25, 18, 11, 4, 5, | ||
| 12, 19, 26, 33, 40, 48, 41, 34, | ||
| 27, 20, 13, 6, 7, 14, 21, 28, | ||
| 35, 42, 49, 56, 57, 50, 43, 36, | ||
| 29, 22, 15, 23, 30, 37, 44, 51, | ||
| 58, 59, 52, 45, 38, 31, 39, 46, | ||
| 53, 60, 61, 54, 47, 55, 62, 63 | ||
| }; | ||
|
|
||
| private static readonly byte[] ZigZagScan = new byte[] | ||
| { | ||
| 0 + 0 * 4, 1 + 0 * 4, 0 + 1 * 4, 0 + 2 * 4, | ||
| 1 + 1 * 4, 2 + 0 * 4, 3 + 0 * 4, 2 + 1 * 4, | ||
| 1 + 2 * 4, 0 + 3 * 4, 1 + 3 * 4, 2 + 2 * 4, | ||
| 3 + 1 * 4, 3 + 2 * 4, 2 + 3 * 4, 3 + 3 * 4 | ||
| }; | ||
|
|
||
| private static void WriteScalingList(ref H264BitStreamWriter writer, IArray<byte> list) | ||
| { | ||
| byte[] scan = list.Length == 16 ? ZigZagScan : ZigZagDirect; | ||
|
|
||
| int lastScale = 8; | ||
|
|
||
| for (int index = 0; index < list.Length; index++) | ||
| { | ||
| byte value = list[scan[index]]; | ||
|
|
||
| int deltaScale = value - lastScale; | ||
|
|
||
| writer.WriteSe(deltaScale); | ||
|
|
||
| lastScale = value; | ||
| } | ||
| } | ||
| } | ||
| } |
| @@ -0,0 +1,33 @@ | ||
| using FFmpeg.AutoGen; | ||
| using Ryujinx.Graphics.Video; | ||
| using System; | ||
|
|
||
| namespace Ryujinx.Graphics.Nvdec.H264 | ||
| { | ||
| unsafe class Surface : ISurface | ||
| { | ||
| public AVFrame* Frame { get; } | ||
|
|
||
| public Plane YPlane => new Plane((IntPtr)Frame->data[0], Stride * Height); | ||
| public Plane UPlane => new Plane((IntPtr)Frame->data[1], UvStride * UvHeight); | ||
| public Plane VPlane => new Plane((IntPtr)Frame->data[2], UvStride * UvHeight); | ||
|
|
||
| public int Width => Frame->width; | ||
| public int Height => Frame->height; | ||
| public int Stride => Frame->linesize[0]; | ||
| public int UvWidth => (Frame->width + 1) >> 1; | ||
| public int UvHeight => (Frame->height + 1) >> 1; | ||
| public int UvStride => Frame->linesize[1]; | ||
|
|
||
| public Surface() | ||
| { | ||
| Frame = ffmpeg.av_frame_alloc(); | ||
| } | ||
|
|
||
| public void Dispose() | ||
| { | ||
| ffmpeg.av_frame_unref(Frame); | ||
| ffmpeg.av_free(Frame); | ||
| } | ||
| } | ||
| } |
| @@ -0,0 +1,9 @@ | ||
| namespace Ryujinx.Graphics.Nvdec.Vp9 | ||
| { | ||
| internal enum BitDepth | ||
| { | ||
| Bits8 = 8, /**< 8 bits */ | ||
| Bits10 = 10, /**< 10 bits */ | ||
| Bits12 = 12, /**< 12 bits */ | ||
| } | ||
| } |
| @@ -0,0 +1,56 @@ | ||
| namespace Ryujinx.Graphics.Nvdec.Vp9 | ||
| { | ||
| internal enum CodecErr | ||
| { | ||
| /*!\brief Operation completed without error */ | ||
| CodecOk, | ||
|
|
||
| /*!\brief Unspecified error */ | ||
| CodecError, | ||
|
|
||
| /*!\brief Memory operation failed */ | ||
| CodecMemError, | ||
|
|
||
| /*!\brief ABI version mismatch */ | ||
| CodecAbiMismatch, | ||
|
|
||
| /*!\brief Algorithm does not have required capability */ | ||
| CodecIncapable, | ||
|
|
||
| /*!\brief The given bitstream is not supported. | ||
| * | ||
| * The bitstream was unable to be parsed at the highest level. The decoder | ||
| * is unable to proceed. This error \ref SHOULD be treated as fatal to the | ||
| * stream. */ | ||
| CodecUnsupBitstream, | ||
|
|
||
| /*!\brief Encoded bitstream uses an unsupported feature | ||
| * | ||
| * The decoder does not implement a feature required by the encoder. This | ||
| * return code should only be used for features that prevent future | ||
| * pictures from being properly decoded. This error \ref MAY be treated as | ||
| * fatal to the stream or \ref MAY be treated as fatal to the current GOP. | ||
| */ | ||
| CodecUnsupFeature, | ||
|
|
||
| /*!\brief The coded data for this stream is corrupt or incomplete | ||
| * | ||
| * There was a problem decoding the current frame. This return code | ||
| * should only be used for failures that prevent future pictures from | ||
| * being properly decoded. This error \ref MAY be treated as fatal to the | ||
| * stream or \ref MAY be treated as fatal to the current GOP. If decoding | ||
| * is continued for the current GOP, artifacts may be present. | ||
| */ | ||
| CodecCorruptFrame, | ||
|
|
||
| /*!\brief An application-supplied parameter is not valid. | ||
| * | ||
| */ | ||
| CodecInvalidParam, | ||
|
|
||
| /*!\brief An iterator reached the end of list. | ||
| * | ||
| */ | ||
| CodecListEnd | ||
| } | ||
| } |
| @@ -0,0 +1,59 @@ | ||
| using System; | ||
| using System.Diagnostics; | ||
| using System.Numerics; | ||
| using System.Runtime.CompilerServices; | ||
|
|
||
| namespace Ryujinx.Graphics.Nvdec.Vp9.Common | ||
| { | ||
| internal static class BitUtils | ||
| { | ||
| // FIXME: Enable inlining here after AVX2 gather bug is fixed. | ||
| // [MethodImpl(MethodImplOptions.AggressiveInlining)] | ||
| public static byte ClipPixel(int val) | ||
| { | ||
| return (byte)((val > 255) ? 255 : (val < 0) ? 0 : val); | ||
| } | ||
|
|
||
| [MethodImpl(MethodImplOptions.AggressiveInlining)] | ||
| public static ushort ClipPixelHighbd(int val, int bd) | ||
| { | ||
| return bd switch | ||
| { | ||
| 10 => (ushort)Math.Clamp(val, 0, 1023), | ||
| 12 => (ushort)Math.Clamp(val, 0, 4095), | ||
| _ => (ushort)Math.Clamp(val, 0, 255) | ||
| }; | ||
| } | ||
|
|
||
| [MethodImpl(MethodImplOptions.AggressiveInlining)] | ||
| public static int RoundPowerOfTwo(int value, int n) | ||
| { | ||
| return (value + (1 << (n - 1))) >> n; | ||
| } | ||
|
|
||
| [MethodImpl(MethodImplOptions.AggressiveInlining)] | ||
| public static long RoundPowerOfTwo(long value, int n) | ||
| { | ||
| return (value + (1L << (n - 1))) >> n; | ||
| } | ||
|
|
||
| [MethodImpl(MethodImplOptions.AggressiveInlining)] | ||
| public static int AlignPowerOfTwo(int value, int n) | ||
| { | ||
| return (value + ((1 << n) - 1)) & ~((1 << n) - 1); | ||
| } | ||
|
|
||
| [MethodImpl(MethodImplOptions.AggressiveInlining)] | ||
| private static int GetMsb(uint n) | ||
| { | ||
| Debug.Assert(n != 0); | ||
| return 31 ^ BitOperations.LeadingZeroCount(n); | ||
| } | ||
|
|
||
| [MethodImpl(MethodImplOptions.AggressiveInlining)] | ||
| public static int GetUnsignedBits(uint numValues) | ||
| { | ||
| return numValues > 0 ? GetMsb(numValues) + 1 : 0; | ||
| } | ||
| } | ||
| } |
| @@ -0,0 +1,94 @@ | ||
| using Ryujinx.Common.Memory; | ||
| using System; | ||
| using System.Runtime.CompilerServices; | ||
| using System.Runtime.InteropServices; | ||
|
|
||
| namespace Ryujinx.Graphics.Nvdec.Vp9.Common | ||
| { | ||
| internal class MemoryAllocator : IDisposable | ||
| { | ||
| private const int PoolEntries = 10; | ||
|
|
||
| private struct PoolItem | ||
| { | ||
| public IntPtr Pointer; | ||
| public int Length; | ||
| public bool InUse; | ||
| } | ||
|
|
||
| private PoolItem[] _pool = new PoolItem[PoolEntries]; | ||
|
|
||
| public ArrayPtr<T> Allocate<T>(int length) where T : unmanaged | ||
| { | ||
| int lengthInBytes = Unsafe.SizeOf<T>() * length; | ||
|
|
||
| IntPtr ptr = IntPtr.Zero; | ||
|
|
||
| for (int i = 0; i < PoolEntries; i++) | ||
| { | ||
| ref PoolItem item = ref _pool[i]; | ||
|
|
||
| if (!item.InUse && item.Length == lengthInBytes) | ||
| { | ||
| item.InUse = true; | ||
| ptr = item.Pointer; | ||
| break; | ||
| } | ||
| } | ||
|
|
||
| if (ptr == IntPtr.Zero) | ||
| { | ||
| ptr = Marshal.AllocHGlobal(lengthInBytes); | ||
|
|
||
| for (int i = 0; i < PoolEntries; i++) | ||
| { | ||
| ref PoolItem item = ref _pool[i]; | ||
|
|
||
| if (!item.InUse) | ||
| { | ||
| item.InUse = true; | ||
| if (item.Pointer != IntPtr.Zero) | ||
| { | ||
| Marshal.FreeHGlobal(item.Pointer); | ||
| } | ||
| item.Pointer = ptr; | ||
| item.Length = lengthInBytes; | ||
| break; | ||
| } | ||
| } | ||
| } | ||
|
|
||
| return new ArrayPtr<T>(ptr, length); | ||
| } | ||
|
|
||
| public unsafe void Free<T>(ArrayPtr<T> arr) where T : unmanaged | ||
| { | ||
| IntPtr ptr = (IntPtr)arr.ToPointer(); | ||
|
|
||
| for (int i = 0; i < PoolEntries; i++) | ||
| { | ||
| ref PoolItem item = ref _pool[i]; | ||
|
|
||
| if (item.Pointer == ptr) | ||
| { | ||
| item.InUse = false; | ||
| break; | ||
| } | ||
| } | ||
| } | ||
|
|
||
| public void Dispose() | ||
| { | ||
| for (int i = 0; i < PoolEntries; i++) | ||
| { | ||
| ref PoolItem item = ref _pool[i]; | ||
|
|
||
| if (item.Pointer != IntPtr.Zero) | ||
| { | ||
| Marshal.FreeHGlobal(item.Pointer); | ||
| item.Pointer = IntPtr.Zero; | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } |
| @@ -0,0 +1,25 @@ | ||
| using Ryujinx.Common.Memory; | ||
| using System; | ||
| using System.Runtime.CompilerServices; | ||
| using System.Runtime.InteropServices; | ||
|
|
||
| namespace Ryujinx.Graphics.Nvdec.Vp9.Common | ||
| { | ||
| internal static class MemoryUtil | ||
| { | ||
| public static unsafe void Copy<T>(T* dest, T* source, int length) where T : unmanaged | ||
| { | ||
| new Span<T>(source, length).CopyTo(new Span<T>(dest, length)); | ||
| } | ||
|
|
||
| public static void Copy<T>(ref T dest, ref T source) where T : unmanaged | ||
| { | ||
| MemoryMarshal.CreateSpan(ref source, 1).CopyTo(MemoryMarshal.CreateSpan(ref dest, 1)); | ||
| } | ||
|
|
||
| public static unsafe void Fill<T>(T* ptr, T value, int length) where T : unmanaged | ||
| { | ||
| new Span<T>(ptr, length).Fill(value); | ||
| } | ||
| } | ||
| } |
| @@ -0,0 +1,71 @@ | ||
| using Ryujinx.Graphics.Nvdec.Vp9.Types; | ||
|
|
||
| namespace Ryujinx.Graphics.Nvdec.Vp9 | ||
| { | ||
| internal static class Constants | ||
| { | ||
| public const int Vp9InterpExtend = 4; | ||
|
|
||
| public const int MaxMbPlane = 3; | ||
|
|
||
| public const int None = -1; | ||
| public const int IntraFrame = 0; | ||
| public const int LastFrame = 1; | ||
| public const int GoldenFrame = 2; | ||
| public const int AltRefFrame = 3; | ||
| public const int MaxRefFrames = 4; | ||
|
|
||
| public const int MiSizeLog2 = 3; | ||
| public const int MiBlockSizeLog2 = 6 - MiSizeLog2; // 64 = 2^6 | ||
|
|
||
| public const int MiSize = 1 << MiSizeLog2; // pixels per mi-unit | ||
| public const int MiBlockSize = 1 << MiBlockSizeLog2; // mi-units per max block | ||
| public const int MiMask = MiBlockSize - 1; | ||
|
|
||
| public const int PartitionPloffset = 4; // number of probability models per block size | ||
|
|
||
| /* Segment Feature Masks */ | ||
| public const int MaxMvRefCandidates = 2; | ||
|
|
||
| public const int CompInterContexts = 5; | ||
| public const int RefContexts = 5; | ||
|
|
||
| public const int EightTap = 0; | ||
| public const int EightTapSmooth = 1; | ||
| public const int EightTapSharp = 2; | ||
| public const int SwitchableFilters = 3; /* Number of switchable filters */ | ||
| public const int Bilinear = 3; | ||
| public const int Switchable = 4; /* should be the last one */ | ||
|
|
||
| // Frame | ||
| public const int RefsPerFrame = 3; | ||
|
|
||
| public const int NumPingPongBuffers = 2; | ||
|
|
||
| public const int Class0Bits = 1; /* bits at integer precision for class 0 */ | ||
| public const int Class0Size = 1 << Class0Bits; | ||
|
|
||
| public const int MvInUseBits = 14; | ||
| public const int MvUpp = (1 << MvInUseBits) - 1; | ||
| public const int MvLow = -(1 << MvInUseBits); | ||
|
|
||
| // Coefficient token alphabet | ||
| public const int ZeroToken = 0; // 0 Extra Bits 0+0 | ||
| public const int OneToken = 1; // 1 Extra Bits 0+1 | ||
| public const int TwoToken = 2; // 2 Extra Bits 0+1 | ||
|
|
||
| public const int PivotNode = 2; | ||
|
|
||
| public const int Cat1MinVal = 5; | ||
| public const int Cat2MinVal = 7; | ||
| public const int Cat3MinVal = 11; | ||
| public const int Cat4MinVal = 19; | ||
| public const int Cat5MinVal = 35; | ||
| public const int Cat6MinVal = 67; | ||
|
|
||
| public const int EobModelToken = 3; | ||
|
|
||
| public const int SegmentAbsData = 1; | ||
| public const int MaxSegments = 8; | ||
| } | ||
| } |
| @@ -0,0 +1,164 @@ | ||
| using Ryujinx.Common.Memory; | ||
| using Ryujinx.Graphics.Nvdec.Vp9.Common; | ||
| using Ryujinx.Graphics.Nvdec.Vp9.Types; | ||
| using Ryujinx.Graphics.Video; | ||
| using System; | ||
| using Vp9MvRef = Ryujinx.Graphics.Video.Vp9MvRef; | ||
|
|
||
| namespace Ryujinx.Graphics.Nvdec.Vp9 | ||
| { | ||
| public class Decoder : IVp9Decoder | ||
| { | ||
| public bool IsHardwareAccelerated => false; | ||
|
|
||
| private readonly MemoryAllocator _allocator = new MemoryAllocator(); | ||
|
|
||
| public ISurface CreateSurface(int width, int height) => new Surface(width, height); | ||
|
|
||
| private static readonly byte[] LiteralToFilter = new byte[] | ||
| { | ||
| Constants.EightTapSmooth, | ||
| Constants.EightTap, | ||
| Constants.EightTapSharp, | ||
| Constants.Bilinear | ||
| }; | ||
|
|
||
| public unsafe bool Decode( | ||
| ref Vp9PictureInfo pictureInfo, | ||
| ISurface output, | ||
| ReadOnlySpan<byte> bitstream, | ||
| ReadOnlySpan<Vp9MvRef> mvsIn, | ||
| Span<Vp9MvRef> mvsOut) | ||
| { | ||
| Vp9Common cm = new Vp9Common(); | ||
|
|
||
| cm.FrameType = pictureInfo.IsKeyFrame ? FrameType.KeyFrame : FrameType.InterFrame; | ||
| cm.IntraOnly = pictureInfo.IntraOnly; | ||
|
|
||
| cm.Width = output.Width; | ||
| cm.Height = output.Height; | ||
|
|
||
| cm.UsePrevFrameMvs = pictureInfo.UsePrevInFindMvRefs; | ||
|
|
||
| cm.RefFrameSignBias = pictureInfo.RefFrameSignBias; | ||
|
|
||
| cm.BaseQindex = pictureInfo.BaseQIndex; | ||
| cm.YDcDeltaQ = pictureInfo.YDcDeltaQ; | ||
| cm.UvAcDeltaQ = pictureInfo.UvAcDeltaQ; | ||
| cm.UvDcDeltaQ = pictureInfo.UvDcDeltaQ; | ||
|
|
||
| cm.Mb.Lossless = pictureInfo.Lossless; | ||
|
|
||
| cm.TxMode = (TxMode)pictureInfo.TransformMode; | ||
|
|
||
| cm.AllowHighPrecisionMv = pictureInfo.AllowHighPrecisionMv; | ||
|
|
||
| cm.InterpFilter = (byte)pictureInfo.InterpFilter; | ||
|
|
||
| if (cm.InterpFilter != Constants.Switchable) | ||
| { | ||
| cm.InterpFilter = LiteralToFilter[cm.InterpFilter]; | ||
| } | ||
|
|
||
| cm.ReferenceMode = (ReferenceMode)pictureInfo.ReferenceMode; | ||
|
|
||
| cm.CompFixedRef = pictureInfo.CompFixedRef; | ||
| cm.CompVarRef = pictureInfo.CompVarRef; | ||
|
|
||
| cm.Log2TileCols = pictureInfo.Log2TileCols; | ||
| cm.Log2TileRows = pictureInfo.Log2TileRows; | ||
|
|
||
| cm.Seg.Enabled = pictureInfo.SegmentEnabled; | ||
| cm.Seg.UpdateMap = pictureInfo.SegmentMapUpdate; | ||
| cm.Seg.TemporalUpdate = pictureInfo.SegmentMapTemporalUpdate; | ||
| cm.Seg.AbsDelta = (byte)pictureInfo.SegmentAbsDelta; | ||
| cm.Seg.FeatureMask = pictureInfo.SegmentFeatureEnable; | ||
| cm.Seg.FeatureData = pictureInfo.SegmentFeatureData; | ||
|
|
||
| cm.Lf.ModeRefDeltaEnabled = pictureInfo.ModeRefDeltaEnabled; | ||
| cm.Lf.RefDeltas = pictureInfo.RefDeltas; | ||
| cm.Lf.ModeDeltas = pictureInfo.ModeDeltas; | ||
|
|
||
| cm.Fc = new Ptr<Vp9EntropyProbs>(ref pictureInfo.Entropy); | ||
| cm.Counts = new Ptr<Vp9BackwardUpdates>(ref pictureInfo.BackwardUpdateCounts); | ||
|
|
||
| cm.FrameRefs[0].Buf = (Surface)pictureInfo.LastReference; | ||
| cm.FrameRefs[1].Buf = (Surface)pictureInfo.GoldenReference; | ||
| cm.FrameRefs[2].Buf = (Surface)pictureInfo.AltReference; | ||
| cm.Mb.CurBuf = (Surface)output; | ||
|
|
||
| cm.Mb.SetupBlockPlanes(1, 1); | ||
|
|
||
| cm.AllocTileWorkerData(_allocator, 1 << pictureInfo.Log2TileCols, 1 << pictureInfo.Log2TileRows); | ||
| cm.AllocContextBuffers(_allocator, output.Width, output.Height); | ||
| cm.InitContextBuffers(); | ||
| cm.SetupSegmentationDequant(); | ||
| cm.SetupScaleFactors(); | ||
|
|
||
| SetMvs(ref cm, mvsIn); | ||
|
|
||
| fixed (byte* dataPtr = bitstream) | ||
| { | ||
| try | ||
| { | ||
| DecodeFrame.DecodeTiles(ref cm, new ArrayPtr<byte>(dataPtr, bitstream.Length)); | ||
| } | ||
| catch (InternalErrorException) | ||
| { | ||
| return false; | ||
| } | ||
| } | ||
|
|
||
| GetMvs(ref cm, mvsOut); | ||
|
|
||
| cm.FreeTileWorkerData(_allocator); | ||
| cm.FreeContextBuffers(_allocator); | ||
|
|
||
| return true; | ||
| } | ||
|
|
||
| private static void SetMvs(ref Vp9Common cm, ReadOnlySpan<Vp9MvRef> mvs) | ||
| { | ||
| if (mvs.Length > cm.PrevFrameMvs.Length) | ||
| { | ||
| throw new ArgumentException($"Size mismatch, expected: {cm.PrevFrameMvs.Length}, but got: {mvs.Length}."); | ||
| } | ||
|
|
||
| for (int i = 0; i < mvs.Length; i++) | ||
| { | ||
| ref var mv = ref cm.PrevFrameMvs[i]; | ||
|
|
||
| mv.Mv[0].Row = mvs[i].Mvs[0].Row; | ||
| mv.Mv[0].Col = mvs[i].Mvs[0].Col; | ||
| mv.Mv[1].Row = mvs[i].Mvs[1].Row; | ||
| mv.Mv[1].Col = mvs[i].Mvs[1].Col; | ||
|
|
||
| mv.RefFrame[0] = (sbyte)mvs[i].RefFrames[0]; | ||
| mv.RefFrame[1] = (sbyte)mvs[i].RefFrames[1]; | ||
| } | ||
| } | ||
|
|
||
| private static void GetMvs(ref Vp9Common cm, Span<Vp9MvRef> mvs) | ||
| { | ||
| if (mvs.Length > cm.CurFrameMvs.Length) | ||
| { | ||
| throw new ArgumentException($"Size mismatch, expected: {cm.CurFrameMvs.Length}, but got: {mvs.Length}."); | ||
| } | ||
|
|
||
| for (int i = 0; i < mvs.Length; i++) | ||
| { | ||
| ref var mv = ref cm.CurFrameMvs[i]; | ||
|
|
||
| mvs[i].Mvs[0].Row = mv.Mv[0].Row; | ||
| mvs[i].Mvs[0].Col = mv.Mv[0].Col; | ||
| mvs[i].Mvs[1].Row = mv.Mv[1].Row; | ||
| mvs[i].Mvs[1].Col = mv.Mv[1].Col; | ||
|
|
||
| mvs[i].RefFrames[0] = mv.RefFrame[0]; | ||
| mvs[i].RefFrames[1] = mv.RefFrame[1]; | ||
| } | ||
| } | ||
|
|
||
| public void Dispose() => _allocator.Dispose(); | ||
| } | ||
| } |
| @@ -0,0 +1,325 @@ | ||
| using Ryujinx.Common.Memory; | ||
| using Ryujinx.Graphics.Nvdec.Vp9.Dsp; | ||
| using Ryujinx.Graphics.Nvdec.Vp9.Types; | ||
| using Ryujinx.Graphics.Video; | ||
| using System; | ||
| using System.Diagnostics; | ||
| using System.Runtime.InteropServices; | ||
| using static Ryujinx.Graphics.Nvdec.Vp9.Dsp.InvTxfm; | ||
|
|
||
| namespace Ryujinx.Graphics.Nvdec.Vp9 | ||
| { | ||
| internal static class Detokenize | ||
| { | ||
| private const int EobContextNode = 0; | ||
| private const int ZeroContextNode = 1; | ||
| private const int OneContextNode = 2; | ||
|
|
||
| private static int GetCoefContext(ReadOnlySpan<short> neighbors, ReadOnlySpan<byte> tokenCache, int c) | ||
| { | ||
| const int maxNeighbors = 2; | ||
|
|
||
| return (1 + tokenCache[neighbors[maxNeighbors * c + 0]] + tokenCache[neighbors[maxNeighbors * c + 1]]) >> 1; | ||
| } | ||
|
|
||
| private static int ReadCoeff( | ||
| ref Reader r, | ||
| ReadOnlySpan<byte> probs, | ||
| int n, | ||
| ref ulong value, | ||
| ref int count, | ||
| ref uint range) | ||
| { | ||
| int i, val = 0; | ||
| for (i = 0; i < n; ++i) | ||
| { | ||
| val = (val << 1) | r.ReadBool(probs[i], ref value, ref count, ref range); | ||
| } | ||
|
|
||
| return val; | ||
| } | ||
|
|
||
| private static int DecodeCoefs( | ||
| ref MacroBlockD xd, | ||
| PlaneType type, | ||
| Span<int> dqcoeff, | ||
| TxSize txSize, | ||
| ref Array2<short> dq, | ||
| int ctx, | ||
| ReadOnlySpan<short> scan, | ||
| ReadOnlySpan<short> nb, | ||
| ref Reader r) | ||
| { | ||
| ref Vp9BackwardUpdates counts = ref xd.Counts.Value; | ||
| int maxEob = 16 << ((int)txSize << 1); | ||
| ref Vp9EntropyProbs fc = ref xd.Fc.Value; | ||
| int refr = xd.Mi[0].Value.IsInterBlock() ? 1 : 0; | ||
| int band, c = 0; | ||
| ref Array6<Array6<Array3<byte>>> coefProbs = ref fc.CoefProbs[(int)txSize][(int)type][refr]; | ||
| Span<byte> tokenCache = stackalloc byte[32 * 32]; | ||
| ReadOnlySpan<byte> bandTranslate = Luts.get_band_translate(txSize); | ||
| int dqShift = (txSize == TxSize.Tx32x32) ? 1 : 0; | ||
| int v; | ||
| short dqv = dq[0]; | ||
| ReadOnlySpan<byte> cat6Prob = (xd.Bd == 12) | ||
| ? Luts.Vp9Cat6ProbHigh12 | ||
| : (xd.Bd == 10) ? new ReadOnlySpan<byte>(Luts.Vp9Cat6ProbHigh12).Slice(2) : Luts.Vp9Cat6Prob; | ||
| int cat6Bits = (xd.Bd == 12) ? 18 : (xd.Bd == 10) ? 16 : 14; | ||
| // Keep value, range, and count as locals. The compiler produces better | ||
| // results with the locals than using r directly. | ||
| ulong value = r.Value; | ||
| uint range = r.Range; | ||
| int count = r.Count; | ||
|
|
||
| while (c < maxEob) | ||
| { | ||
| int val = -1; | ||
| band = bandTranslate[0]; | ||
| bandTranslate = bandTranslate.Slice(1); | ||
| ref Array3<byte> prob = ref coefProbs[band][ctx]; | ||
| if (!xd.Counts.IsNull) | ||
| { | ||
| ++counts.EobBranch[(int)txSize][(int)type][refr][band][ctx]; | ||
| } | ||
|
|
||
| if (r.ReadBool(prob[EobContextNode], ref value, ref count, ref range) == 0) | ||
| { | ||
| if (!xd.Counts.IsNull) | ||
| { | ||
| ++counts.Coef[(int)txSize][(int)type][refr][band][ctx][Constants.EobModelToken]; | ||
| } | ||
|
|
||
| break; | ||
| } | ||
|
|
||
| while (r.ReadBool(prob[ZeroContextNode], ref value, ref count, ref range) == 0) | ||
| { | ||
| if (!xd.Counts.IsNull) | ||
| { | ||
| ++counts.Coef[(int)txSize][(int)type][refr][band][ctx][Constants.ZeroToken]; | ||
| } | ||
|
|
||
| dqv = dq[1]; | ||
| tokenCache[scan[c]] = 0; | ||
| ++c; | ||
| if (c >= maxEob) | ||
| { | ||
| r.Value = value; | ||
| r.Range = range; | ||
| r.Count = count; | ||
| return c; // Zero tokens at the end (no eob token) | ||
| } | ||
| ctx = GetCoefContext(nb, tokenCache, c); | ||
| band = bandTranslate[0]; | ||
| bandTranslate = bandTranslate.Slice(1); | ||
| prob = ref coefProbs[band][ctx]; | ||
| } | ||
|
|
||
| if (r.ReadBool(prob[OneContextNode], ref value, ref count, ref range) != 0) | ||
| { | ||
| ReadOnlySpan<byte> p = Luts.Vp9Pareto8Full[prob[Constants.PivotNode] - 1]; | ||
| if (!xd.Counts.IsNull) | ||
| { | ||
| ++counts.Coef[(int)txSize][(int)type][refr][band][ctx][Constants.TwoToken]; | ||
| } | ||
|
|
||
| if (r.ReadBool(p[0], ref value, ref count, ref range) != 0) | ||
| { | ||
| if (r.ReadBool(p[3], ref value, ref count, ref range) != 0) | ||
| { | ||
| tokenCache[scan[c]] = 5; | ||
| if (r.ReadBool(p[5], ref value, ref count, ref range) != 0) | ||
| { | ||
| if (r.ReadBool(p[7], ref value, ref count, ref range) != 0) | ||
| { | ||
| val = Constants.Cat6MinVal + ReadCoeff(ref r, cat6Prob, cat6Bits, ref value, ref count, ref range); | ||
| } | ||
| else | ||
| { | ||
| val = Constants.Cat5MinVal + ReadCoeff(ref r, Luts.Vp9Cat5Prob, 5, ref value, ref count, ref range); | ||
| } | ||
| } | ||
| else if (r.ReadBool(p[6], ref value, ref count, ref range) != 0) | ||
| { | ||
| val = Constants.Cat4MinVal + ReadCoeff(ref r, Luts.Vp9Cat4Prob, 4, ref value, ref count, ref range); | ||
| } | ||
| else | ||
| { | ||
| val = Constants.Cat3MinVal + ReadCoeff(ref r, Luts.Vp9Cat3Prob, 3, ref value, ref count, ref range); | ||
| } | ||
| } | ||
| else | ||
| { | ||
| tokenCache[scan[c]] = 4; | ||
| if (r.ReadBool(p[4], ref value, ref count, ref range) != 0) | ||
| { | ||
| val = Constants.Cat2MinVal + ReadCoeff(ref r, Luts.Vp9Cat2Prob, 2, ref value, ref count, ref range); | ||
| } | ||
| else | ||
| { | ||
| val = Constants.Cat1MinVal + ReadCoeff(ref r, Luts.Vp9Cat1Prob, 1, ref value, ref count, ref range); | ||
| } | ||
| } | ||
| // Val may use 18-bits | ||
| v = (int)(((long)val * dqv) >> dqShift); | ||
| } | ||
| else | ||
| { | ||
| if (r.ReadBool(p[1], ref value, ref count, ref range) != 0) | ||
| { | ||
| tokenCache[scan[c]] = 3; | ||
| v = ((3 + r.ReadBool(p[2], ref value, ref count, ref range)) * dqv) >> dqShift; | ||
| } | ||
| else | ||
| { | ||
| tokenCache[scan[c]] = 2; | ||
| v = (2 * dqv) >> dqShift; | ||
| } | ||
| } | ||
| } | ||
| else | ||
| { | ||
| if (!xd.Counts.IsNull) | ||
| { | ||
| ++counts.Coef[(int)txSize][(int)type][refr][band][ctx][Constants.OneToken]; | ||
| } | ||
|
|
||
| tokenCache[scan[c]] = 1; | ||
| v = dqv >> dqShift; | ||
| } | ||
| dqcoeff[scan[c]] = (int)HighbdCheckRange(r.ReadBool(128, ref value, ref count, ref range) != 0 ? -v : v, xd.Bd); | ||
| ++c; | ||
| ctx = GetCoefContext(nb, tokenCache, c); | ||
| dqv = dq[1]; | ||
| } | ||
|
|
||
| r.Value = value; | ||
| r.Range = range; | ||
| r.Count = count; | ||
| return c; | ||
| } | ||
|
|
||
| private static void GetCtxShift(ref MacroBlockD xd, ref int ctxShiftA, ref int ctxShiftL, int x, int y, uint txSizeInBlocks) | ||
| { | ||
| if (xd.MaxBlocksWide != 0) | ||
| { | ||
| if (txSizeInBlocks + x > xd.MaxBlocksWide) | ||
| { | ||
| ctxShiftA = (int)(txSizeInBlocks - (xd.MaxBlocksWide - x)) * 8; | ||
| } | ||
| } | ||
| if (xd.MaxBlocksHigh != 0) | ||
| { | ||
| if (txSizeInBlocks + y > xd.MaxBlocksHigh) | ||
| { | ||
| ctxShiftL = (int)(txSizeInBlocks - (xd.MaxBlocksHigh - y)) * 8; | ||
| } | ||
| } | ||
| } | ||
|
|
||
| private static PlaneType GetPlaneType(int plane) | ||
| { | ||
| return (PlaneType)(plane > 0 ? 1 : 0); | ||
| } | ||
|
|
||
| public static int DecodeBlockTokens( | ||
| ref TileWorkerData twd, | ||
| int plane, | ||
| Luts.ScanOrder sc, | ||
| int x, | ||
| int y, | ||
| TxSize txSize, | ||
| int segId) | ||
| { | ||
| ref Reader r = ref twd.BitReader; | ||
| ref MacroBlockD xd = ref twd.Xd; | ||
| ref MacroBlockDPlane pd = ref xd.Plane[plane]; | ||
| ref Array2<short> dequant = ref pd.SegDequant[segId]; | ||
| int eob; | ||
| Span<sbyte> a = pd.AboveContext.ToSpan().Slice(x); | ||
| Span<sbyte> l = pd.LeftContext.ToSpan().Slice(y); | ||
| int ctx; | ||
| int ctxShiftA = 0; | ||
| int ctxShiftL = 0; | ||
|
|
||
| switch (txSize) | ||
| { | ||
| case TxSize.Tx4x4: | ||
| ctx = a[0] != 0 ? 1 : 0; | ||
| ctx += l[0] != 0 ? 1 : 0; | ||
| eob = DecodeCoefs( | ||
| ref xd, | ||
| GetPlaneType(plane), | ||
| pd.DqCoeff.ToSpan(), | ||
| txSize, | ||
| ref dequant, | ||
| ctx, | ||
| sc.Scan, | ||
| sc.Neighbors, | ||
| ref r); | ||
| a[0] = l[0] = (sbyte)(eob > 0 ? 1 : 0); | ||
| break; | ||
| case TxSize.Tx8x8: | ||
| GetCtxShift(ref xd, ref ctxShiftA, ref ctxShiftL, x, y, 1 << (int)TxSize.Tx8x8); | ||
| ctx = MemoryMarshal.Cast<sbyte, ushort>(a)[0] != 0 ? 1 : 0; | ||
| ctx += MemoryMarshal.Cast<sbyte, ushort>(l)[0] != 0 ? 1 : 0; | ||
| eob = DecodeCoefs( | ||
| ref xd, | ||
| GetPlaneType(plane), | ||
| pd.DqCoeff.ToSpan(), | ||
| txSize, | ||
| ref dequant, | ||
| ctx, | ||
| sc.Scan, | ||
| sc.Neighbors, | ||
| ref r); | ||
| MemoryMarshal.Cast<sbyte, ushort>(a)[0] = (ushort)((eob > 0 ? 0x0101 : 0) >> ctxShiftA); | ||
| MemoryMarshal.Cast<sbyte, ushort>(l)[0] = (ushort)((eob > 0 ? 0x0101 : 0) >> ctxShiftL); | ||
| break; | ||
| case TxSize.Tx16x16: | ||
| GetCtxShift(ref xd, ref ctxShiftA, ref ctxShiftL, x, y, 1 << (int)TxSize.Tx16x16); | ||
| ctx = MemoryMarshal.Cast<sbyte, uint>(a)[0] != 0 ? 1 : 0; | ||
| ctx += MemoryMarshal.Cast<sbyte, uint>(l)[0] != 0 ? 1 : 0; | ||
| eob = DecodeCoefs( | ||
| ref xd, | ||
| GetPlaneType(plane), | ||
| pd.DqCoeff.ToSpan(), | ||
| txSize, | ||
| ref dequant, | ||
| ctx, | ||
| sc.Scan, | ||
| sc.Neighbors, | ||
| ref r); | ||
| MemoryMarshal.Cast<sbyte, uint>(a)[0] = (uint)((eob > 0 ? 0x01010101 : 0) >> ctxShiftA); | ||
| MemoryMarshal.Cast<sbyte, uint>(l)[0] = (uint)((eob > 0 ? 0x01010101 : 0) >> ctxShiftL); | ||
| break; | ||
| case TxSize.Tx32x32: | ||
| GetCtxShift(ref xd, ref ctxShiftA, ref ctxShiftL, x, y, 1 << (int)TxSize.Tx32x32); | ||
| // NOTE: Casting to ulong here is safe because the default memory | ||
| // alignment is at least 8 bytes and the Tx32x32 is aligned on 8 byte | ||
| // boundaries. | ||
| ctx = MemoryMarshal.Cast<sbyte, ulong>(a)[0] != 0 ? 1 : 0; | ||
| ctx += MemoryMarshal.Cast<sbyte, ulong>(l)[0] != 0 ? 1 : 0; | ||
| eob = DecodeCoefs( | ||
| ref xd, | ||
| GetPlaneType(plane), | ||
| pd.DqCoeff.ToSpan(), | ||
| txSize, | ||
| ref dequant, | ||
| ctx, | ||
| sc.Scan, | ||
| sc.Neighbors, | ||
| ref r); | ||
| MemoryMarshal.Cast<sbyte, ulong>(a)[0] = (eob > 0 ? 0x0101010101010101UL : 0) >> ctxShiftA; | ||
| MemoryMarshal.Cast<sbyte, ulong>(l)[0] = (eob > 0 ? 0x0101010101010101UL : 0) >> ctxShiftL; | ||
| break; | ||
| default: | ||
| Debug.Assert(false, "Invalid transform size."); | ||
| eob = 0; | ||
| break; | ||
| } | ||
|
|
||
| return eob; | ||
| } | ||
| } | ||
| } |
| @@ -0,0 +1,12 @@ | ||
| namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp | ||
| { | ||
| internal static class Filter | ||
| { | ||
| public const int FilterBits = 7; | ||
|
|
||
| public const int SubpelBits = 4; | ||
| public const int SubpelMask = (1 << SubpelBits) - 1; | ||
| public const int SubpelShifts = 1 << SubpelBits; | ||
| public const int SubpelTaps = 8; | ||
| } | ||
| } |
| @@ -0,0 +1,73 @@ | ||
| using Ryujinx.Graphics.Nvdec.Vp9.Common; | ||
| using System; | ||
| using System.Diagnostics; | ||
|
|
||
| namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp | ||
| { | ||
| internal static class Prob | ||
| { | ||
| public const int MaxProb = 255; | ||
|
|
||
| private static byte GetProb(uint num, uint den) | ||
| { | ||
| Debug.Assert(den != 0); | ||
| { | ||
| int p = (int)(((ulong)num * 256 + (den >> 1)) / den); | ||
| // (p > 255) ? 255 : (p < 1) ? 1 : p; | ||
| int clippedProb = p | ((255 - p) >> 23) | (p == 0 ? 1 : 0); | ||
| return (byte)clippedProb; | ||
| } | ||
| } | ||
|
|
||
| /* This function assumes prob1 and prob2 are already within [1,255] range. */ | ||
| public static byte WeightedProb(int prob1, int prob2, int factor) | ||
| { | ||
| return (byte)BitUtils.RoundPowerOfTwo(prob1 * (256 - factor) + prob2 * factor, 8); | ||
| } | ||
|
|
||
| // MODE_MV_MAX_UPDATE_FACTOR (128) * count / MODE_MV_COUNT_SAT; | ||
| private static readonly uint[] CountToUpdateFactor = new uint[] | ||
| { | ||
| 0, 6, 12, 19, 25, 32, 38, 44, 51, 57, 64, | ||
| 70, 76, 83, 89, 96, 102, 108, 115, 121, 128 | ||
| }; | ||
|
|
||
| private const int ModeMvCountSat = 20; | ||
|
|
||
| public static byte ModeMvMergeProbs(byte preProb, uint ct0, uint ct1) | ||
| { | ||
| uint den = ct0 + ct1; | ||
| if (den == 0) | ||
| { | ||
| return preProb; | ||
| } | ||
| else | ||
| { | ||
| uint count = Math.Min(den, ModeMvCountSat); | ||
| uint factor = CountToUpdateFactor[(int)count]; | ||
| byte prob = GetProb(ct0, den); | ||
| return WeightedProb(preProb, prob, (int)factor); | ||
| } | ||
| } | ||
|
|
||
| private static uint TreeMergeProbsImpl( | ||
| uint i, | ||
| sbyte[] tree, | ||
| ReadOnlySpan<byte> preProbs, | ||
| ReadOnlySpan<uint> counts, | ||
| Span<byte> probs) | ||
| { | ||
| int l = tree[i]; | ||
| uint leftCount = (l <= 0) ? counts[-l] : TreeMergeProbsImpl((uint)l, tree, preProbs, counts, probs); | ||
| int r = tree[i + 1]; | ||
| uint rightCount = (r <= 0) ? counts[-r] : TreeMergeProbsImpl((uint)r, tree, preProbs, counts, probs); | ||
| probs[(int)(i >> 1)] = ModeMvMergeProbs(preProbs[(int)(i >> 1)], leftCount, rightCount); | ||
| return leftCount + rightCount; | ||
| } | ||
|
|
||
| public static void TreeMergeProbs(sbyte[] tree, ReadOnlySpan<byte> preProbs, ReadOnlySpan<uint> counts, Span<byte> probs) | ||
| { | ||
| TreeMergeProbsImpl(0, tree, preProbs, counts, probs); | ||
| } | ||
| } | ||
| } |
| @@ -0,0 +1,237 @@ | ||
| using System; | ||
| using System.Buffers.Binary; | ||
| using Ryujinx.Common.Memory; | ||
|
|
||
| namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp | ||
| { | ||
| internal struct Reader | ||
| { | ||
| private static readonly byte[] Norm = new byte[] | ||
| { | ||
| 0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | ||
| 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | ||
| 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | ||
| 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | ||
| 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, | ||
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 | ||
| }; | ||
| private const int BdValueSize = sizeof(ulong) * 8; | ||
|
|
||
| // This is meant to be a large, positive constant that can still be efficiently | ||
| // loaded as an immediate (on platforms like ARM, for example). | ||
| // Even relatively modest values like 100 would work fine. | ||
| private const int LotsOfBits = 0x40000000; | ||
|
|
||
| public ulong Value; | ||
| public uint Range; | ||
| public int Count; | ||
| private ArrayPtr<byte> _buffer; | ||
|
|
||
| public bool Init(ArrayPtr<byte> buffer, int size) | ||
| { | ||
| if (size != 0 && buffer.IsNull) | ||
| { | ||
| return true; | ||
| } | ||
| else | ||
| { | ||
| _buffer = new ArrayPtr<byte>(ref buffer[0], size); | ||
| Value = 0; | ||
| Count = -8; | ||
| Range = 255; | ||
| Fill(); | ||
| return ReadBit() != 0; // Marker bit | ||
| } | ||
| } | ||
|
|
||
| private void Fill() | ||
| { | ||
| ReadOnlySpan<byte> buffer = _buffer.ToSpan(); | ||
| ReadOnlySpan<byte> bufferStart = buffer; | ||
| ulong value = Value; | ||
| int count = Count; | ||
| ulong bytesLeft = (ulong)buffer.Length; | ||
| ulong bitsLeft = bytesLeft * 8; | ||
| int shift = BdValueSize - 8 - (count + 8); | ||
|
|
||
| if (bitsLeft > BdValueSize) | ||
| { | ||
| int bits = (shift & unchecked((int)0xfffffff8)) + 8; | ||
| ulong nv; | ||
| ulong bigEndianValues = BinaryPrimitives.ReadUInt64BigEndian(buffer); | ||
| nv = bigEndianValues >> (BdValueSize - bits); | ||
| count += bits; | ||
| buffer = buffer.Slice(bits >> 3); | ||
| value = Value | (nv << (shift & 0x7)); | ||
| } | ||
| else | ||
| { | ||
| int bitsOver = shift + 8 - (int)bitsLeft; | ||
| int loopEnd = 0; | ||
| if (bitsOver >= 0) | ||
| { | ||
| count += LotsOfBits; | ||
| loopEnd = bitsOver; | ||
| } | ||
|
|
||
| if (bitsOver < 0 || bitsLeft != 0) | ||
| { | ||
| while (shift >= loopEnd) | ||
| { | ||
| count += 8; | ||
| value |= (ulong)buffer[0] << shift; | ||
| buffer = buffer.Slice(1); | ||
| shift -= 8; | ||
| } | ||
| } | ||
| } | ||
|
|
||
| // NOTE: Variable 'buffer' may not relate to '_buffer' after decryption, | ||
| // so we increase '_buffer' by the amount that 'buffer' moved, rather than | ||
| // assign 'buffer' to '_buffer'. | ||
| _buffer = _buffer.Slice(bufferStart.Length - buffer.Length); | ||
| Value = value; | ||
| Count = count; | ||
| } | ||
|
|
||
| public bool HasError() | ||
| { | ||
| // Check if we have reached the end of the buffer. | ||
| // | ||
| // Variable 'count' stores the number of bits in the 'value' buffer, minus | ||
| // 8. The top byte is part of the algorithm, and the remainder is buffered | ||
| // to be shifted into it. So if count == 8, the top 16 bits of 'value' are | ||
| // occupied, 8 for the algorithm and 8 in the buffer. | ||
| // | ||
| // When reading a byte from the user's buffer, count is filled with 8 and | ||
| // one byte is filled into the value buffer. When we reach the end of the | ||
| // data, count is additionally filled with LotsOfBits. So when | ||
| // count == LotsOfBits - 1, the user's data has been exhausted. | ||
| // | ||
| // 1 if we have tried to decode bits after the end of stream was encountered. | ||
| // 0 No error. | ||
| return Count > BdValueSize && Count < LotsOfBits; | ||
| } | ||
|
|
||
| public int Read(int prob) | ||
| { | ||
| uint bit = 0; | ||
| ulong value; | ||
| ulong bigsplit; | ||
| int count; | ||
| uint range; | ||
| uint split = (Range * (uint)prob + (256 - (uint)prob)) >> 8; | ||
|
|
||
| if (Count < 0) | ||
| { | ||
| Fill(); | ||
| } | ||
|
|
||
| value = Value; | ||
| count = Count; | ||
|
|
||
| bigsplit = (ulong)split << (BdValueSize - 8); | ||
|
|
||
| range = split; | ||
|
|
||
| if (value >= bigsplit) | ||
| { | ||
| range = Range - split; | ||
| value -= bigsplit; | ||
| bit = 1; | ||
| } | ||
|
|
||
| { | ||
| int shift = Norm[range]; | ||
| range <<= shift; | ||
| value <<= shift; | ||
| count -= shift; | ||
| } | ||
| Value = value; | ||
| Count = count; | ||
| Range = range; | ||
|
|
||
| return (int)bit; | ||
| } | ||
|
|
||
| public int ReadBit() | ||
| { | ||
| return Read(128); // vpx_prob_half | ||
| } | ||
|
|
||
| public int ReadLiteral(int bits) | ||
| { | ||
| int literal = 0, bit; | ||
|
|
||
| for (bit = bits - 1; bit >= 0; bit--) | ||
| { | ||
| literal |= ReadBit() << bit; | ||
| } | ||
|
|
||
| return literal; | ||
| } | ||
|
|
||
| public int ReadTree(ReadOnlySpan<sbyte> tree, ReadOnlySpan<byte> probs) | ||
| { | ||
| sbyte i = 0; | ||
|
|
||
| while ((i = tree[i + Read(probs[i >> 1])]) > 0) | ||
| { | ||
| continue; | ||
| } | ||
|
|
||
| return -i; | ||
| } | ||
|
|
||
| public int ReadBool(int prob, ref ulong value, ref int count, ref uint range) | ||
| { | ||
| uint split = (range * (uint)prob + (256 - (uint)prob)) >> 8; | ||
| ulong bigsplit = (ulong)split << (BdValueSize - 8); | ||
|
|
||
| if (count < 0) | ||
| { | ||
| Value = value; | ||
| Count = count; | ||
| Fill(); | ||
| value = Value; | ||
| count = Count; | ||
| } | ||
|
|
||
| if (value >= bigsplit) | ||
| { | ||
| range = range - split; | ||
| value = value - bigsplit; | ||
| { | ||
| int shift = Norm[range]; | ||
| range <<= shift; | ||
| value <<= shift; | ||
| count -= shift; | ||
| } | ||
| return 1; | ||
| } | ||
| range = split; | ||
| { | ||
| int shift = Norm[range]; | ||
| range <<= shift; | ||
| value <<= shift; | ||
| count -= shift; | ||
| } | ||
| return 0; | ||
| } | ||
|
|
||
| public ArrayPtr<byte> FindEnd() | ||
| { | ||
| // Find the end of the coded buffer | ||
| while (Count > 8 && Count < BdValueSize) | ||
| { | ||
| Count -= 8; | ||
| _buffer = _buffer.Slice(-1); | ||
| } | ||
| return _buffer; | ||
| } | ||
| } | ||
| } |
| @@ -0,0 +1,54 @@ | ||
| namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp | ||
| { | ||
| internal static class TxfmCommon | ||
| { | ||
| // Constants used by all idct/dct functions | ||
| public const int DctConstBits = 14; | ||
| public const int DctConstRounding = 1 << (DctConstBits - 1); | ||
|
|
||
| public const int UnitQuantShift = 2; | ||
| public const int UnitQuantFactor = 1 << UnitQuantShift; | ||
|
|
||
| // Constants: | ||
| // for (int i = 1; i < 32; ++i) | ||
| // Console.WriteLine("public const short CosPi{0}_64 = {1};", i, MathF.Round(16384 * MathF.Cos(i * MathF.PI / 64))); | ||
| // Note: sin(k * Pi / 64) = cos((32 - k) * Pi / 64) | ||
| public const short CosPi1_64 = 16364; | ||
| public const short CosPi2_64 = 16305; | ||
| public const short CosPi3_64 = 16207; | ||
| public const short CosPi4_64 = 16069; | ||
| public const short CosPi5_64 = 15893; | ||
| public const short CosPi6_64 = 15679; | ||
| public const short CosPi7_64 = 15426; | ||
| public const short CosPi8_64 = 15137; | ||
| public const short CosPi9_64 = 14811; | ||
| public const short CosPi10_64 = 14449; | ||
| public const short CosPi11_64 = 14053; | ||
| public const short CosPi12_64 = 13623; | ||
| public const short CosPi13_64 = 13160; | ||
| public const short CosPi14_64 = 12665; | ||
| public const short CosPi15_64 = 12140; | ||
| public const short CosPi16_64 = 11585; | ||
| public const short CosPi17_64 = 11003; | ||
| public const short CosPi18_64 = 10394; | ||
| public const short CosPi19_64 = 9760; | ||
| public const short CosPi20_64 = 9102; | ||
| public const short CosPi21_64 = 8423; | ||
| public const short CosPi22_64 = 7723; | ||
| public const short CosPi23_64 = 7005; | ||
| public const short CosPi24_64 = 6270; | ||
| public const short CosPi25_64 = 5520; | ||
| public const short CosPi26_64 = 4756; | ||
| public const short CosPi27_64 = 3981; | ||
| public const short CosPi28_64 = 3196; | ||
| public const short CosPi29_64 = 2404; | ||
| public const short CosPi30_64 = 1606; | ||
| public const short CosPi31_64 = 804; | ||
|
|
||
| // 16384 * sqrt(2) * sin(kPi / 9) * 2 / 3 | ||
| public const short SinPi1_9 = 5283; | ||
| public const short SinPi2_9 = 9929; | ||
| public const short SinPi3_9 = 13377; | ||
| public const short SinPi4_9 = 15212; | ||
| } | ||
| } |