Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add an alternative backend parallel compilation queue for macOS #5291

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/Ryujinx.Ava/AppHost.cs
Expand Up @@ -134,7 +134,7 @@ internal class AppHost
_inputManager = inputManager;
_accountManager = accountManager;
_userChannelPersistence = userChannelPersistence;
_renderingThread = new Thread(RenderLoop, 1 * 1024 * 1024) { Name = "GUI.RenderThread" };
_renderingThread = new Thread(RenderLoop, 2 * 1024 * 1024) { Name = "GUI.RenderThread" };
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do the same change in SDL2 headless and GTK for consistency.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should probably be in a common place, but I'm not sure where to put it. It will be annoying to change 4 files every time we want to change the stack size.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess maybe some helper in the common project to create a thread with a given name and stack size would be nice?

Then we would have other helper around for the common thread that are duplicated for all projects that have the right values?

Copy link
Member Author

@gdkchan gdkchan Jun 15, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ideally, the render thread should be created by the ThreadedRenderer and the GPU thread should be created on the GpuContext. But it's not that simple since the OpenGL context is tied to the thread it is created on, and there's also some coupling with the UI and a few places that needs to know if the current thread is the "render" thread. Not something that I will change here though, so I guess I will just increase the stack size of the other threads for now.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For the sake of readability, it might be nice to make it a named constant - either the whole stack size or the multiplier

_lastCursorMoveTime = Stopwatch.GetTimestamp();
_glLogLevel = ConfigurationState.Instance.Logger.GraphicsDebugLevel;
_topLevel = topLevel;
Expand Down
29 changes: 23 additions & 6 deletions src/Ryujinx.Graphics.Vulkan/ShaderCollection.cs
Expand Up @@ -53,7 +53,7 @@ public bool IsLinked

private ProgramPipelineState _state;
private DisposableRenderPass _dummyRenderPass;
private Task _compileTask;
private ShaderCompilationRequest _compileRequest;
private bool _firstBackgroundUse;

public ShaderCollection(
Expand Down Expand Up @@ -119,7 +119,7 @@ public bool IsLinked
ClearSegments = BuildClearSegments(resourceLayout.Sets);
BindingSegments = BuildBindingSegments(resourceLayout.SetUsages);

_compileTask = Task.CompletedTask;
_compileRequest = new ShaderCompilationRequest(Task.CompletedTask);
_firstBackgroundUse = false;
}

Expand All @@ -133,7 +133,9 @@ public bool IsLinked
{
_state = state;

_compileTask = BackgroundCompilation();
_compileRequest = gd.ShaderCompilationQueue != null
? gd.ShaderCompilationQueue.Add(BackgroundCompilation)
: new ShaderCompilationRequest(BackgroundCompilationAsync());
_firstBackgroundUse = !fromCache;
}

Expand Down Expand Up @@ -252,10 +254,25 @@ private static ResourceBindingSegment[][] BuildBindingSegments(ReadOnlyCollectio
return segments;
}

private async Task BackgroundCompilation()
private async Task BackgroundCompilationAsync()
{
await Task.WhenAll(_shaders.Select(shader => shader.CompileTask));

BackgroundCompilationImpl();
}

private void BackgroundCompilation()
{
foreach (var shader in _shaders)
{
shader.CompileTask.Wait();
}

BackgroundCompilationImpl();
}

private void BackgroundCompilationImpl()
{
if (_shaders.Any(shader => shader.CompileStatus == ProgramLinkStatus.Failure))
{
LinkStatus = ProgramLinkStatus.Failure;
Expand Down Expand Up @@ -397,11 +414,11 @@ public ProgramLinkStatus CheckProgramLink(bool blocking)
}
}

if (!_compileTask.IsCompleted)
if (!_compileRequest.IsCompleted)
{
if (blocking)
{
_compileTask.Wait();
_compileRequest.Wait();

if (LinkStatus == ProgramLinkStatus.Failure)
{
Expand Down
131 changes: 131 additions & 0 deletions src/Ryujinx.Graphics.Vulkan/ShaderCompilationQueue.cs
@@ -0,0 +1,131 @@
using System;
using System.Collections.Concurrent;
using System.Threading;

namespace Ryujinx.Graphics.Vulkan
{
class ShaderCompilationQueue
{
private const int MaxParallelCompilations = 8;
private const int MaxThreadStackSize = 2 * 1024 * 1024; // MB

private struct Request
{
public readonly ulong Id;
public readonly Action Callback;

public Request(ulong id, Action callback)
{
Id = id;
Callback = callback;
}
}

private readonly Thread[] _workerThreads;
private readonly CancellationTokenSource _cts;
private readonly BlockingCollection<Request>[] _queues;
private readonly ulong[] _finishedIds;
private ulong _currentId;
private int _currentQueueIndex;

public ShaderCompilationQueue()
{
_workerThreads = new Thread[MaxParallelCompilations];
_queues = new BlockingCollection<Request>[MaxParallelCompilations];
_finishedIds = new ulong[MaxParallelCompilations];

_cts = new CancellationTokenSource();

for (int i = 0; i < MaxParallelCompilations; i++)
{
_queues[i] = new BlockingCollection<Request>();

Thread thread = new Thread(DoWork, MaxThreadStackSize) { Name = $"BackgroundShaderCompiler.{i}" };
thread.IsBackground = true;
thread.Start(i);

_workerThreads[i] = thread;
}
}

private void DoWork(object threadId)
{
int queueIndex = (int)threadId;

try
{
var queue = _queues[queueIndex];

foreach (var request in queue.GetConsumingEnumerable(_cts.Token))
{
request.Callback();

lock (queue)
{
_finishedIds[queueIndex] = request.Id;

Monitor.PulseAll(queue);
}
}
}
catch (OperationCanceledException)
{
}
}

public ShaderCompilationRequest Add(Action callback)
{
ulong newId = Interlocked.Increment(ref _currentId);

// Let's keep rotating between the queues to increase the chances
// that the selected queue thread is currently idle.
int queueIndex = Interlocked.Increment(ref _currentQueueIndex) % MaxParallelCompilations;

_queues[queueIndex].Add(new Request(newId, callback));

return new ShaderCompilationRequest(this, queueIndex, newId);
}

public void Wait(int queueIndex, ulong id)
{
var queue = _queues[queueIndex];

lock (queue)
{
while (_finishedIds[queueIndex] < id)
{
Monitor.Wait(queue);
}
}
}

public bool IsCompleted(int queueIndex, ulong id)
{
var queue = _queues[queueIndex];

lock (queue)
{
return _finishedIds[queueIndex] >= id;
}
}

public void Dispose()
{
for (int i = 0; i < MaxParallelCompilations; i++)
{
_queues[i].CompleteAdding();
}

_cts.Cancel();

for (int i = 0; i < MaxParallelCompilations; i++)
{
_workerThreads[i].Join();

_queues[i].Dispose();
}

_cts.Dispose();
}
}
}
55 changes: 55 additions & 0 deletions src/Ryujinx.Graphics.Vulkan/ShaderCompilationRequest.cs
@@ -0,0 +1,55 @@
using System.Threading.Tasks;

namespace Ryujinx.Graphics.Vulkan
{
struct ShaderCompilationRequest
{
private readonly Task _task;
private readonly ShaderCompilationQueue _queue;
private readonly int _queueIndex;
private readonly ulong _requestId;

public bool IsCompleted
{
get
{
if (_task != null)
{
return _task.IsCompleted;
}
else
{
return _queue.IsCompleted(_queueIndex, _requestId);
}
}
}

public ShaderCompilationRequest(Task task)
{
_task = task;
_queue = null;
_queueIndex = 0;
_requestId = 0;
}

public ShaderCompilationRequest(ShaderCompilationQueue queue, int queueIndex, ulong requestId)
{
_task = null;
_queue = queue;
_queueIndex = queueIndex;
_requestId = requestId;
}

public void Wait()
{
if (_task != null)
{
_task.Wait();
}
else
{
_queue.Wait(_queueIndex, _requestId);
}
}
}
}
9 changes: 9 additions & 0 deletions src/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs
Expand Up @@ -48,6 +48,7 @@ public sealed class VulkanRenderer : IRenderer
internal DescriptorSetManager DescriptorSetManager { get; private set; }
internal PipelineLayoutCache PipelineLayoutCache { get; private set; }
internal BackgroundResources BackgroundResources { get; private set; }
internal ShaderCompilationQueue ShaderCompilationQueue { get; private set; }
internal Action<Action> InterruptAction { get; private set; }
internal SyncManager SyncManager { get; private set; }

Expand Down Expand Up @@ -104,6 +105,12 @@ public VulkanRenderer(Vk api, Func<Instance, Vk, SurfaceKHR> surfaceFunc, Func<s

// Any device running on MacOS is using MoltenVK, even Intel and AMD vendors.
IsMoltenVk = true;

// The default thread stack size on MacOS is low, and can cause stack overflow
// on SPIR-V Cross during shader compilation.
// As a workaround, we use this custom queue which allows us to specify the stack
// size of the threads used for compilation.
ShaderCompilationQueue = new ShaderCompilationQueue();
}
}

Expand Down Expand Up @@ -859,6 +866,8 @@ public unsafe void Dispose()

SurfaceApi.DestroySurface(_instance.Instance, _surface, null);

ShaderCompilationQueue?.Dispose();

Api.DestroyDevice(_device, null);

_debugMessenger.Dispose();
Expand Down