-
-
Notifications
You must be signed in to change notification settings - Fork 1.9k
/
VKGSRender.h
593 lines (471 loc) · 16.2 KB
/
VKGSRender.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
#pragma once
#include "Emu/RSX/GSRender.h"
#include "Emu/Cell/timers.hpp"
#include "upscalers/upscaling.h"
#include "vkutils/descriptors.h"
#include "vkutils/data_heap.h"
#include "vkutils/instance.hpp"
#include "vkutils/sync.h"
#include "vkutils/swapchain.hpp"
#include "VKTextureCache.h"
#include "VKRenderTargets.h"
#include "VKFormats.h"
#include "VKTextOut.h"
#include "VKOverlays.h"
#include "VKProgramBuffer.h"
#include "VKFramebuffer.h"
#include "VKShaderInterpreter.h"
#include "VKQueryPool.h"
#include "../GCM.h"
#include <thread>
#include <optional>
namespace vk
{
using vertex_cache = rsx::vertex_cache::default_vertex_cache<rsx::vertex_cache::uploaded_range<VkFormat>, VkFormat>;
using weak_vertex_cache = rsx::vertex_cache::weak_vertex_cache<VkFormat>;
using null_vertex_cache = vertex_cache;
using shader_cache = rsx::shaders_cache<vk::pipeline_props, vk::program_cache>;
struct vertex_upload_info
{
VkPrimitiveTopology primitive;
u32 vertex_draw_count;
u32 allocated_vertex_count;
u32 first_vertex;
u32 vertex_index_base;
u32 vertex_index_offset;
u32 persistent_window_offset;
u32 volatile_window_offset;
std::optional<std::tuple<VkDeviceSize, VkIndexType>> index_info;
};
}
// Initial heap allocation values. The heaps are growable and will automatically increase in size to accomodate demands
#define VK_ATTRIB_RING_BUFFER_SIZE_M 64
#define VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M 64
#define VK_UBO_RING_BUFFER_SIZE_M 16
#define VK_TRANSFORM_CONSTANTS_BUFFER_SIZE_M 16
#define VK_FRAGMENT_CONSTANTS_BUFFER_SIZE_M 16
#define VK_INDEX_RING_BUFFER_SIZE_M 16
#define VK_MAX_ASYNC_CB_COUNT 256
#define VK_MAX_ASYNC_FRAMES 2
using rsx::flags32_t;
namespace vk
{
struct buffer_view;
struct command_buffer_chunk: public vk::command_buffer
{
vk::fence* submit_fence = nullptr;
VkDevice m_device = VK_NULL_HANDLE;
atomic_t<bool> pending = { false };
u64 eid_tag = 0;
u64 reset_id = 0;
shared_mutex guard_mutex;
command_buffer_chunk() = default;
void init_fence(VkDevice dev)
{
m_device = dev;
submit_fence = new vk::fence(dev);
}
void destroy()
{
vk::command_buffer::destroy();
delete submit_fence;
}
void tag()
{
eid_tag = vk::get_event_id();
}
void reset()
{
if (pending)
poke();
if (pending)
wait(FRAME_PRESENT_TIMEOUT);
++reset_id;
CHECK_RESULT(vkResetCommandBuffer(commands, 0));
}
bool poke()
{
reader_lock lock(guard_mutex);
if (!pending)
return true;
if (!submit_fence->flushed)
return false;
if (vkGetFenceStatus(m_device, submit_fence->handle) == VK_SUCCESS)
{
lock.upgrade();
if (pending)
{
submit_fence->reset();
vk::on_event_completed(eid_tag);
pending = false;
eid_tag = 0;
}
}
return !pending;
}
VkResult wait(u64 timeout = 0ull)
{
reader_lock lock(guard_mutex);
if (!pending)
return VK_SUCCESS;
const auto ret = vk::wait_for_fence(submit_fence, timeout);
lock.upgrade();
if (pending)
{
submit_fence->reset();
vk::on_event_completed(eid_tag);
pending = false;
eid_tag = 0;
}
return ret;
}
void flush()
{
reader_lock lock(guard_mutex);
if (!pending)
return;
submit_fence->wait_flush();
}
};
struct occlusion_data
{
rsx::simple_array<u32> indices;
command_buffer_chunk* command_buffer_to_wait = nullptr;
u64 command_buffer_sync_id = 0;
bool is_current(command_buffer_chunk* cmd) const
{
return (command_buffer_to_wait == cmd && command_buffer_sync_id == cmd->reset_id);
}
void set_sync_command_buffer(command_buffer_chunk* cmd)
{
command_buffer_to_wait = cmd;
command_buffer_sync_id = cmd->reset_id;
}
void sync()
{
if (command_buffer_to_wait->reset_id == command_buffer_sync_id)
{
// Allocation stack is FIFO and very long so no need to actually wait for fence signal
command_buffer_to_wait->flush();
}
}
};
struct frame_context_t
{
VkSemaphore acquire_signal_semaphore = VK_NULL_HANDLE;
VkSemaphore present_wait_semaphore = VK_NULL_HANDLE;
vk::descriptor_set descriptor_set;
vk::descriptor_pool descriptor_pool;
u32 used_descriptors = 0;
flags32_t flags = 0;
std::vector<std::unique_ptr<vk::buffer_view>> buffer_views_to_clean;
u32 present_image = -1;
command_buffer_chunk* swap_command_buffer = nullptr;
//Heap pointers
s64 attrib_heap_ptr = 0;
s64 vtx_env_heap_ptr = 0;
s64 frag_env_heap_ptr = 0;
s64 frag_const_heap_ptr = 0;
s64 vtx_const_heap_ptr = 0;
s64 vtx_layout_heap_ptr = 0;
s64 frag_texparam_heap_ptr = 0;
s64 index_heap_ptr = 0;
s64 texture_upload_heap_ptr = 0;
s64 rasterizer_env_heap_ptr = 0;
u64 last_frame_sync_time = 0;
//Copy shareable information
void grab_resources(frame_context_t &other)
{
present_wait_semaphore = other.present_wait_semaphore;
acquire_signal_semaphore = other.acquire_signal_semaphore;
descriptor_set.swap(other.descriptor_set);
descriptor_pool = other.descriptor_pool;
used_descriptors = other.used_descriptors;
flags = other.flags;
attrib_heap_ptr = other.attrib_heap_ptr;
vtx_env_heap_ptr = other.vtx_env_heap_ptr;
frag_env_heap_ptr = other.frag_env_heap_ptr;
vtx_layout_heap_ptr = other.vtx_layout_heap_ptr;
frag_texparam_heap_ptr = other.frag_texparam_heap_ptr;
frag_const_heap_ptr = other.frag_const_heap_ptr;
vtx_const_heap_ptr = other.vtx_const_heap_ptr;
index_heap_ptr = other.index_heap_ptr;
texture_upload_heap_ptr = other.texture_upload_heap_ptr;
rasterizer_env_heap_ptr = other.rasterizer_env_heap_ptr;
}
//Exchange storage (non-copyable)
void swap_storage(frame_context_t &other)
{
std::swap(buffer_views_to_clean, other.buffer_views_to_clean);
}
void tag_frame_end(
s64 attrib_loc, s64 vtxenv_loc, s64 fragenv_loc, s64 vtxlayout_loc,
s64 fragtex_loc, s64 fragconst_loc,s64 vtxconst_loc, s64 index_loc,
s64 texture_loc, s64 rasterizer_loc)
{
attrib_heap_ptr = attrib_loc;
vtx_env_heap_ptr = vtxenv_loc;
frag_env_heap_ptr = fragenv_loc;
vtx_layout_heap_ptr = vtxlayout_loc;
frag_texparam_heap_ptr = fragtex_loc;
frag_const_heap_ptr = fragconst_loc;
vtx_const_heap_ptr = vtxconst_loc;
index_heap_ptr = index_loc;
texture_upload_heap_ptr = texture_loc;
rasterizer_env_heap_ptr = rasterizer_loc;
last_frame_sync_time = get_system_time();
}
void reset_heap_ptrs()
{
last_frame_sync_time = 0;
}
};
struct flush_request_task
{
atomic_t<bool> pending_state{ false }; //Flush request status; true if rsx::thread is yet to service this request
atomic_t<int> num_waiters{ 0 }; //Number of threads waiting for this request to be serviced
bool hard_sync = false;
flush_request_task() = default;
void post(bool _hard_sync)
{
hard_sync = (hard_sync || _hard_sync);
pending_state = true;
num_waiters++;
}
void remove_one()
{
num_waiters--;
}
void clear_pending_flag()
{
hard_sync = false;
pending_state.store(false);
}
bool pending() const
{
return pending_state.load();
}
void consumer_wait() const
{
while (num_waiters.load() != 0)
{
#ifdef _MSC_VER
_mm_pause();
#else
__builtin_ia32_pause();
#endif
}
}
void producer_wait() const
{
while (pending_state.load())
{
std::this_thread::yield();
}
}
};
struct present_surface_info
{
u32 address;
u32 format;
u32 width;
u32 height;
u32 pitch;
};
}
using namespace vk::vmm_allocation_pool_; // clang workaround.
using namespace vk::upscaling_flags_; // ditto
class VKGSRender : public GSRender, public ::rsx::reports::ZCULL_control
{
private:
enum
{
VK_HEAP_CHECK_TEXTURE_UPLOAD_STORAGE = 0x1,
VK_HEAP_CHECK_VERTEX_STORAGE = 0x2,
VK_HEAP_CHECK_VERTEX_ENV_STORAGE = 0x4,
VK_HEAP_CHECK_FRAGMENT_ENV_STORAGE = 0x8,
VK_HEAP_CHECK_TEXTURE_ENV_STORAGE = 0x10,
VK_HEAP_CHECK_VERTEX_LAYOUT_STORAGE = 0x20,
VK_HEAP_CHECK_TRANSFORM_CONSTANTS_STORAGE = 0x40,
VK_HEAP_CHECK_FRAGMENT_CONSTANTS_STORAGE = 0x80,
VK_HEAP_CHECK_MAX_ENUM = VK_HEAP_CHECK_FRAGMENT_CONSTANTS_STORAGE,
VK_HEAP_CHECK_ALL = 0xFF,
};
enum frame_context_state : u32
{
dirty = 1
};
enum flush_queue_state : u32
{
ok = 0,
flushing = 1,
deadlock = 2
};
private:
VKFragmentProgram m_fragment_prog;
VKVertexProgram m_vertex_prog;
vk::glsl::program *m_program = nullptr;
vk::pipeline_props m_pipeline_properties;
vk::texture_cache m_texture_cache;
vk::surface_cache m_rtts;
std::unique_ptr<vk::buffer> null_buffer;
std::unique_ptr<vk::buffer_view> null_buffer_view;
std::unique_ptr<vk::text_writer> m_text_writer;
std::unique_ptr<vk::upscaler> m_upscaler;
bool m_use_fsr_upscaling{false};
std::unique_ptr<vk::buffer> m_cond_render_buffer;
u64 m_cond_render_sync_tag = 0;
shared_mutex m_sampler_mutex;
atomic_t<bool> m_samplers_dirty = { true };
std::unique_ptr<vk::sampler> m_stencil_mirror_sampler;
std::array<std::unique_ptr<rsx::sampled_image_descriptor_base>, rsx::limits::fragment_textures_count> fs_sampler_state = {};
std::array<std::unique_ptr<rsx::sampled_image_descriptor_base>, rsx::limits::vertex_textures_count> vs_sampler_state = {};
std::array<vk::sampler*, rsx::limits::fragment_textures_count> fs_sampler_handles{};
std::array<vk::sampler*, rsx::limits::vertex_textures_count> vs_sampler_handles{};
std::unique_ptr<vk::buffer_view> m_persistent_attribute_storage;
std::unique_ptr<vk::buffer_view> m_volatile_attribute_storage;
std::unique_ptr<vk::buffer_view> m_vertex_layout_storage;
public:
//vk::fbo draw_fbo;
std::unique_ptr<vk::vertex_cache> m_vertex_cache;
std::unique_ptr<vk::shader_cache> m_shaders_cache;
private:
std::unique_ptr<vk::program_cache> m_prog_buffer;
std::unique_ptr<vk::swapchain_base> m_swapchain;
vk::instance m_instance;
vk::render_device *m_device;
//Vulkan internals
vk::command_pool m_command_buffer_pool;
std::unique_ptr<vk::query_pool_manager> m_occlusion_query_manager;
bool m_occlusion_query_active = false;
rsx::reports::occlusion_query_info *m_active_query_info = nullptr;
std::vector<vk::occlusion_data> m_occlusion_map;
shared_mutex m_secondary_cb_guard;
vk::command_pool m_secondary_command_buffer_pool;
vk::command_buffer m_secondary_command_buffer; //command buffer used for setup operations
u32 m_current_cb_index = 0;
std::array<vk::command_buffer_chunk, VK_MAX_ASYNC_CB_COUNT> m_primary_cb_list;
vk::command_buffer_chunk* m_current_command_buffer = nullptr;
VkDescriptorSetLayout descriptor_layouts;
VkPipelineLayout pipeline_layout;
vk::framebuffer_holder* m_draw_fbo = nullptr;
sizeu m_swapchain_dims{};
bool swapchain_unavailable = false;
bool should_reinitialize_swapchain = false;
u64 m_last_heap_sync_time = 0;
u32 m_texbuffer_view_size = 0;
vk::data_heap m_attrib_ring_info; // Vertex data
vk::data_heap m_fragment_constants_ring_info; // Fragment program constants
vk::data_heap m_transform_constants_ring_info; // Transform program constants
vk::data_heap m_fragment_env_ring_info; // Fragment environment params
vk::data_heap m_vertex_env_ring_info; // Vertex environment params
vk::data_heap m_fragment_texture_params_ring_info; // Fragment texture params
vk::data_heap m_vertex_layout_ring_info; // Vertex layout structure
vk::data_heap m_index_buffer_ring_info; // Index data
vk::data_heap m_texture_upload_buffer_ring_info; // Texture upload heap
vk::data_heap m_raster_env_ring_info; // Raster control such as polygon and line stipple
vk::data_heap m_fragment_instructions_buffer;
vk::data_heap m_vertex_instructions_buffer;
VkDescriptorBufferInfo m_vertex_env_buffer_info;
VkDescriptorBufferInfo m_fragment_env_buffer_info;
VkDescriptorBufferInfo m_vertex_layout_stream_info;
VkDescriptorBufferInfo m_vertex_constants_buffer_info;
VkDescriptorBufferInfo m_fragment_constants_buffer_info;
VkDescriptorBufferInfo m_fragment_texture_params_buffer_info;
VkDescriptorBufferInfo m_raster_env_buffer_info;
VkDescriptorBufferInfo m_vertex_instructions_buffer_info;
VkDescriptorBufferInfo m_fragment_instructions_buffer_info;
std::array<vk::frame_context_t, VK_MAX_ASYNC_FRAMES> frame_context_storage;
//Temp frame context to use if the real frame queue is overburdened. Only used for storage
vk::frame_context_t m_aux_frame_context;
u32 m_current_queue_index = 0;
vk::frame_context_t* m_current_frame = nullptr;
std::deque<vk::frame_context_t*> m_queued_frames;
VkViewport m_viewport{};
VkRect2D m_scissor{};
std::vector<u8> m_draw_buffers;
shared_mutex m_flush_queue_mutex;
vk::flush_request_task m_flush_requests;
// Offloader thread deadlock recovery
rsx::atomic_bitmask_t<flush_queue_state> m_queue_status;
utils::address_range m_offloader_fault_range;
rsx::invalidation_cause m_offloader_fault_cause;
u32 m_current_subdraw_id = 0;
u64 m_current_renderpass_key = 0;
VkRenderPass m_cached_renderpass = VK_NULL_HANDLE;
std::vector<vk::image*> m_fbo_images;
//Vertex layout
rsx::vertex_input_layout m_vertex_layout;
vk::shader_interpreter m_shader_interpreter;
u32 m_interpreter_state;
#if defined(HAVE_X11) && defined(HAVE_VULKAN)
Display *m_display_handle = nullptr;
#endif
public:
u64 get_cycles() final;
VKGSRender();
~VKGSRender() override;
private:
void prepare_rtts(rsx::framebuffer_creation_context context);
void open_command_buffer();
void close_and_submit_command_buffer(
vk::fence* fence = nullptr,
VkSemaphore wait_semaphore = VK_NULL_HANDLE,
VkSemaphore signal_semaphore = VK_NULL_HANDLE,
VkPipelineStageFlags pipeline_stage_flags = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT);
void flush_command_queue(bool hard_sync = false, bool do_not_switch = false);
void queue_swap_request();
void frame_context_cleanup(vk::frame_context_t *ctx, bool free_resources = false);
void advance_queued_frames();
void present(vk::frame_context_t *ctx);
void reinitialize_swapchain();
vk::viewable_image* get_present_source(vk::present_surface_info* info, const rsx::avconf& avconfig);
void begin_render_pass();
void close_render_pass();
VkRenderPass get_render_pass();
void update_draw_state();
void check_heap_status(u32 flags = VK_HEAP_CHECK_ALL);
void check_present_status();
void check_descriptors();
VkDescriptorSet allocate_descriptor_set();
vk::vertex_upload_info upload_vertex_data();
bool load_program();
void load_program_env();
void update_vertex_env(u32 id, const vk::vertex_upload_info& vertex_info);
void load_texture_env();
bool bind_texture_env();
bool bind_interpreter_texture_env();
public:
void init_buffers(rsx::framebuffer_creation_context context, bool skip_reading = false);
void set_viewport();
void set_scissor(bool clip_viewport);
void bind_viewport();
void sync_hint(rsx::FIFO_hint hint, void* args) override;
void begin_occlusion_query(rsx::reports::occlusion_query_info* query) override;
void end_occlusion_query(rsx::reports::occlusion_query_info* query) override;
bool check_occlusion_query_status(rsx::reports::occlusion_query_info* query) override;
void get_occlusion_query_result(rsx::reports::occlusion_query_info* query) override;
void discard_occlusion_query(rsx::reports::occlusion_query_info* query) override;
// External callback in case we need to suddenly submit a commandlist unexpectedly, e.g in a violation handler
void emergency_query_cleanup(vk::command_buffer* commands);
// External callback to handle out of video memory problems
bool on_vram_exhausted(rsx::problem_severity severity);
// Conditional rendering
void begin_conditional_rendering(const std::vector<rsx::reports::occlusion_query_info*>& sources) override;
void end_conditional_rendering() override;
protected:
void clear_surface(u32 mask) override;
void begin() override;
void end() override;
void emit_geometry(u32 sub_index) override;
void on_init_thread() override;
void on_exit() override;
void flip(const rsx::display_flip_info_t& info) override;
void renderctl(u32 request_code, void* args) override;
void do_local_task(rsx::FIFO_state state) override;
bool scaled_image_from_memory(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool interpolate) override;
void notify_tile_unbound(u32 tile) override;
bool on_access_violation(u32 address, bool is_writing) override;
void on_invalidate_memory_range(const utils::address_range &range, rsx::invalidation_cause cause) override;
void on_semaphore_acquire_wait() override;
};