-
Notifications
You must be signed in to change notification settings - Fork 70
/
vk_cmdbuffer.h
1371 lines (1143 loc) · 55.8 KB
/
vk_cmdbuffer.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
***********************************************************************************************************************
*
* Copyright (c) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
**********************************************************************************************************************/
/**
**************************************************************************************************
* @file vk_cmdbuffer.h
* @brief Implementation of Vulkan command buffer class.
**************************************************************************************************
*/
#ifndef __VK_CMDBUFFER_H__
#define __VK_CMDBUFFER_H__
#pragma once
#include "include/khronos/vulkan.h"
#include "include/vk_cmd_pool.h"
#include "include/vk_event.h"
#include "include/vk_dispatch.h"
#include "include/vk_pipeline_layout.h"
#include "include/vk_render_pass.h"
#include "include/vk_utils.h"
#include "include/gpu_event_mgr.h"
#include "include/internal_mem_mgr.h"
#include "include/stencil_ops_combiner.h"
#include "include/vert_buf_binding_mgr.h"
#include "include/virtual_stack_mgr.h"
#include "renderpass/renderpass_builder.h"
#include "palCmdBuffer.h"
#include "palDequeImpl.h"
#include "palGpuMemory.h"
#include "palLinearAllocator.h"
#include "palPipeline.h"
#include "palQueue.h"
// Forward declare PAL classes used in this file
namespace Pal
{
struct BarrierInfo;
struct CmdBufferCreateInfo;
class ICmdBuffer;
class IGpuEvent;
struct ImageLayout;
};
namespace vk
{
// Forward declare Vulkan classes used in this file
class ComputePipeline;
class Device;
class DispatchableCmdBuffer;
class Framebuffer;
class GraphicsPipeline;
class Image;
class Queue;
class RenderPass;
class TimestampQueryPool;
class SqttCmdBufferState;
// =====================================================================================================================
// Represents an internal GPU allocation owned by a Vulkan command buffer. Can contain things like internal descriptor
// set data and other non-PM4 related data.
struct CmdBufGpuMem
{
InternalMemory internalMem; // Internal memory allocation
InternalMemCreateInfo info; // Information about this allocation
CmdBufGpuMem* pNext; // Intrusive list pointer to the next command buffer GPU memory object.
};
constexpr uint32_t MaxDescSetRegCount = MaxDescriptorSets * PipelineLayout::SetPtrRegCount;
constexpr uint32_t MaxDynDescRegCount = MaxDynamicDescriptors * PipelineLayout::DynDescRegCount;
constexpr uint32_t MaxBindingRegCount = MaxDescSetRegCount + MaxDynDescRegCount;
constexpr uint32_t MaxPushConstRegCount = MaxPushConstants / 4;
// This structure contains information about currently written user data entries within the command buffer
struct PipelineBindState
{
// Cached copy of the user data layout from the current pipeline's layout
PipelineLayout::UserDataLayout userDataLayout;
// Current pipeline's layout
const PipelineLayout* pLayout;
// Currently pushed constant values (relative to an base = 0)
uint32_t pushConstData[MaxPushConstRegCount];
// High-water mark of the largest number of bound sets
uint32_t boundSetCount;
// High-water mark of the largest number of pushed constants
uint32_t pushedConstCount;
};
// Members of CmdBufferRenderState that are different for each GPU
struct PerGpuRenderState
{
// Currently bound descriptor sets and dynamic offsets (relative to base = 00)
uint32_t setBindingData[static_cast<uint32_t>(Pal::PipelineBindPoint::Count)][MaxBindingRegCount];
const Pal::IMsaaState* pMsaaState;
const Pal::IColorBlendState* pColorBlendState;
const Pal::IDepthStencilState* pDepthStencilState;
};
// Members of CmdBufferRenderState that are the same for each GPU
struct AllGpuRenderState
{
const GraphicsPipeline* pGraphicsPipeline;
const ComputePipeline* pComputePipeline;
const RenderPass* pRenderPass;
const Framebuffer* pFramebuffer;
const Pal::IMsaaState* const * pBltMsaaStates;
PipelineBindState pipelineState[static_cast<uint32_t>(Pal::PipelineBindPoint::Count)];
Pal::ViewportParams viewport;
Pal::ScissorRectParams scissor;
Pal::DynamicGraphicsShaderInfos graphicsShaderInfo;
Pal::DynamicComputeShaderInfo computeShaderInfo;
// These tokens describe the current "static" values of pieces of Vulkan render state. These are set by pipelines
// that program static render state, and are reset to DynamicRenderStateToken by vkCmdSet* functions.
//
// Command buffer recording can compare these tokens with new incoming tokens to efficiently redundancy check
// render state and avoid context rolling. This redundancy checking is only done for static pipeline state and not
// for vkCmdSet* function values.
struct
{
uint32_t inputAssemblyState;
uint32_t triangleRasterState;
uint32_t pointLineRasterState;
uint32_t depthBiasState;
uint32_t blendConst;
uint32_t depthBounds;
uint32_t viewports;
uint32_t scissorRect;
uint32_t samplePattern;
} staticTokens;
};
// This structure describes current render state within a command buffer during its building.
struct CmdBufferRenderState
{
AllGpuRenderState allGpuState;
PerGpuRenderState perGpuState[MaxPalDevices];
};
// State tracked during a render pass instance when building a command buffer.
struct RenderPassInstanceState
{
// Per-attachment instance state
struct AttachmentState
{
Pal::ImageLayout aspectLayout[3]; // Current per-aspect (color, depth, stencil) PAL layout
VkClearValue clearValue; // Specified load-op clear value for this attachment
SamplePattern initialSamplePattern; // Initial sample pattern at first layout transition of
// depth/stencil attachment.
};
union
{
struct
{
uint32_t samplePatternValid : 1;
uint32_t reserved : 31;
};
uint32_t u32All;
} flags;
RenderPassInstanceState(PalAllocator* pAllocator);
const RenderPassExecuteInfo* pExecuteInfo;
uint32_t subpass;
uint32_t renderAreaCount;
Pal::Rect renderArea[MaxPalDevices];
size_t maxAttachmentCount;
AttachmentState* pAttachments;
size_t maxSubpassCount;
SamplePattern* pSamplePatterns;
};
// =====================================================================================================================
// A Vulkan command buffer.
class CmdBuffer
{
public:
typedef VkCommandBuffer ApiType;
static VkResult Create(
Device* pDevice,
const VkCommandBufferAllocateInfo* pAllocateInfo,
VkCommandBuffer* pCommandBuffers);
VkResult Begin(
const VkCommandBufferBeginInfo* pBeginInfo);
VkResult Reset(VkCommandBufferResetFlags flags);
VkResult End(void);
void BindPipeline(
VkPipelineBindPoint pipelineBindPoint,
VkPipeline pipeline);
void ExecuteCommands(
uint32_t cmdBufferCount,
const VkCommandBuffer* pCmdBuffers);
void BindDescriptorSets(
VkPipelineBindPoint pipelineBindPoint,
VkPipelineLayout layout,
uint32_t firstSet,
uint32_t setCount,
const VkDescriptorSet* pDescriptorSets,
uint32_t dynamicOffsetCount,
const uint32_t* pDynamicOffsets);
void BindIndexBuffer(
VkBuffer buffer,
VkDeviceSize offset,
VkIndexType indexType);
void BindVertexBuffers(
uint32_t firstBinding,
uint32_t bindingCount,
const VkBuffer* pBuffers,
const VkDeviceSize* pOffsets);
void Draw(
uint32_t firstVertex,
uint32_t vertexCount,
uint32_t firstInstance,
uint32_t instanceCount);
void DrawIndexed(
uint32_t firstIndex,
uint32_t indexCount,
int32_t vertexOffset,
uint32_t firstInstance,
uint32_t instanceCount);
template< bool indexed, bool useBufferCount>
void DrawIndirect(
VkBuffer buffer,
VkDeviceSize offset,
uint32_t count,
uint32_t stride,
VkBuffer countBuffer,
VkDeviceSize countOffset);
void Dispatch(
uint32_t x,
uint32_t y,
uint32_t z);
void DispatchOffset(
uint32_t base_x,
uint32_t base_y,
uint32_t base_z,
uint32_t dim_x,
uint32_t dim_y,
uint32_t dim_z);
void DispatchIndirect(
VkBuffer buffer,
VkDeviceSize offset);
void CopyBuffer(
VkBuffer srcBuffer,
VkBuffer destBuffer,
uint32_t regionCount,
const VkBufferCopy* pRegions);
void CopyImage(
VkImage srcImage,
VkImageLayout srcImageLayout,
VkImage destImage,
VkImageLayout destImageLayout,
uint32_t regionCount,
const VkImageCopy* pRegions);
void BlitImage(
VkImage srcImage,
VkImageLayout srcImageLayout,
VkImage destImage,
VkImageLayout destImageLayout,
uint32_t regionCount,
const VkImageBlit* pRegions,
VkFilter filter);
void CopyBufferToImage(
VkBuffer srcBuffer,
VkImage destImage,
VkImageLayout destImageLayout,
uint32_t regionCount,
const VkBufferImageCopy* pRegions);
void CopyImageToBuffer(
VkImage srcImage,
VkImageLayout srcImageLayout,
VkBuffer destBuffer,
uint32_t regionCount,
const VkBufferImageCopy* pRegions);
void UpdateBuffer(
VkBuffer destBuffer,
VkDeviceSize destOffset,
VkDeviceSize dataSize,
const uint32_t* pData);
void FillBuffer(
VkBuffer destBuffer,
VkDeviceSize destOffset,
VkDeviceSize fillSize,
uint32_t data);
void ClearColorImage(
VkImage image,
VkImageLayout imageLayout,
const VkClearColorValue* pColor,
uint32_t rangeCount,
const VkImageSubresourceRange* pRanges);
void ClearDepthStencilImage(
VkImage image,
VkImageLayout imageLayout,
float depth,
uint32_t stencil,
uint32_t rangeCount,
const VkImageSubresourceRange* pRanges);
void ClearAttachments(
uint32_t attachmentCount,
const VkClearAttachment* pAttachments,
uint32_t rectCount,
const VkClearRect* pRects);
void ClearImageAttachments(
uint32_t attachmentCount,
const VkClearAttachment* pAttachments,
uint32_t rectCount,
const VkClearRect* pRects);
void ClearBoundAttachments(
uint32_t attachmentCount,
const VkClearAttachment* pAttachments,
uint32_t rectCount,
const VkClearRect* pRects);
void ResolveImage(
VkImage srcImage,
VkImageLayout srcImageLayout,
VkImage destImage,
VkImageLayout destImageLayout,
uint32_t rectCount,
const VkImageResolve* pRects);
void SetViewport(
uint32_t firstViewport,
uint32_t viewportCount,
const VkViewport* pViewports);
void SetAllViewports(
const Pal::ViewportParams& params,
uint32_t staticToken);
void SetScissor(
uint32_t firstScissor,
uint32_t scissorCount,
const VkRect2D* pScissors);
void SetAllScissors(
const Pal::ScissorRectParams& params,
uint32_t staticToken);
void SetLineWidth(
float lineWidth);
void SetDepthBias(
float depthBias,
float depthBiasClamp,
float slopeScaledDepthBias);
void SetBlendConstants(
const float blendConst[4]);
void SetDepthBounds(
float minDepthBounds,
float maxDepthBounds);
void SetStencilCompareMask(
VkStencilFaceFlags faceMask,
uint32_t stencilCompareMask);
void SetStencilWriteMask(
VkStencilFaceFlags faceMask,
uint32_t stencilWriteMask);
void SetStencilReference(
VkStencilFaceFlags faceMask,
uint32_t stencilReference);
void SetEvent(
VkEvent event,
VkPipelineStageFlags stageMask);
void ResetEvent(
VkEvent event,
VkPipelineStageFlags stageMask);
void WaitEvents(
uint32_t eventCount,
const VkEvent* pEvents,
VkPipelineStageFlags srcStageMask,
VkPipelineStageFlags dstStageMask,
uint32_t memoryBarrierCount,
const VkMemoryBarrier* pMemoryBarriers,
uint32_t bufferMemoryBarrierCount,
const VkBufferMemoryBarrier* pBufferMemoryBarriers,
uint32_t imageMemoryBarrierCount,
const VkImageMemoryBarrier* pImageMemoryBarriers);
void PipelineBarrier(
VkPipelineStageFlags srcStageMask,
VkPipelineStageFlags dstStageMask,
uint32_t memoryBarrierCount,
const VkMemoryBarrier* pMemoryBarriers,
uint32_t bufferMemoryBarrierCount,
const VkBufferMemoryBarrier* pBufferMemoryBarriers,
uint32_t imageMemoryBarrierCount,
const VkImageMemoryBarrier* pImageMemoryBarriers);
void BeginQuery(
VkQueryPool queryPool,
uint32_t query,
VkQueryControlFlags flags);
void EndQuery(
VkQueryPool queryPool,
uint32_t query);
void ResetQueryPool(
VkQueryPool queryPool,
uint32_t firstQuery,
uint32_t queryCount);
void CopyQueryPoolResults(
VkQueryPool queryPool,
uint32_t firstQuery,
uint32_t queryCount,
VkBuffer destBuffer,
VkDeviceSize destOffset,
VkDeviceSize destStride,
VkQueryResultFlags flags);
void WriteTimestamp(
VkPipelineStageFlagBits pipelineStage,
const TimestampQueryPool* pQueryPool,
uint32_t query);
void SetSampleLocations(
const VkSampleLocationsInfoEXT* pSampleLocationsInfo);
void BeginRenderPass(
const VkRenderPassBeginInfo* pRenderPassBegin,
VkSubpassContents contents);
void NextSubPass(VkSubpassContents contents);
void EndRenderPass();
void PushConstants(
VkPipelineLayout layout,
VkShaderStageFlags stageFlags,
uint32_t start,
uint32_t length,
const void* values);
void WriteBufferMarker(
VkPipelineStageFlagBits pipelineStage,
VkBuffer dstBuffer,
VkDeviceSize dstOffset,
uint32_t marker);
VK_INLINE void SetDeviceMask(uint32_t deviceMask)
{
// Ensure we are enabling valid devices within the group
VK_ASSERT((m_pDevice->GetPalDeviceMask() & deviceMask) == deviceMask);
// Ensure disabled devices are not enabled during recording
VK_ASSERT(((m_palDeviceUsedMask ^ deviceMask) & deviceMask) == 0);
m_palDeviceMask = deviceMask;
}
VK_INLINE uint32_t GetDeviceMask() const
{
return m_palDeviceMask;
}
VK_INLINE uint32_t GetDeviceUsedMask() const
{
return m_palDeviceUsedMask;
}
VkResult Destroy(void);
VK_FORCEINLINE Device* VkDevice(void) const
{ return m_pDevice; }
VK_FORCEINLINE Instance* VkInstance(void) const
{ return m_pDevice->VkInstance(); }
VK_INLINE Pal::ICmdBuffer* PalCmdBuffer(
int32_t idx = DefaultDeviceIndex) const
{
if (idx == 0)
{
VK_ASSERT((uintptr_t)m_pPalCmdBuffers[idx] == (uintptr_t)this + sizeof(*this));
return (Pal::ICmdBuffer*)((uintptr_t)this + sizeof(*this));
}
VK_ASSERT((idx >= 0) && (idx < static_cast<int32_t>(MaxPalDevices)));
return m_pPalCmdBuffers[idx];
}
static Pal::uint32 ConvertBarrierSrcAccessFlags(const Device* pDevice, VkAccessFlags accessMask);
static Pal::uint32 ConvertBarrierDstAccessFlags(const Device* pDevice, VkAccessFlags accessMask);
static void ConvertBarrierCacheFlags(
const Device* pDevice,
VkAccessFlags srcAccess,
VkAccessFlags dstAccess,
uint32_t supportInputCacheMask,
uint32_t supportOutputCacheMask,
uint32_t barrierOptions,
Pal::BarrierTransition* pResult);
VK_INLINE uint32_t GetQueueFamilyIndex() const { return m_queueFamilyIndex; }
VK_INLINE Pal::QueueType GetPalQueueType() const { return m_palQueueType; }
VK_INLINE Pal::EngineType GetPalEngineType() const { return m_palEngineType; }
VK_INLINE VirtualStackAllocator* GetStackAllocator() { return m_pStackAllocator; }
void RequestRenderPassEvents(uint32_t eventCount, GpuEvents*** pppGpuEvents);
void PalCmdBarrier(
const Pal::BarrierInfo& info);
void PalCmdBarrier(
Pal::BarrierInfo* pInfo,
Pal::BarrierTransition* const pTransitions,
const Image** const pTransitionImages);
Pal::Result PalCmdBufferBegin(
const Pal::CmdBufferBuildInfo& cmdInfo);
Pal::Result PalCmdBufferEnd();
Pal::Result PalCmdBufferReset(Pal::ICmdAllocator* pCmdAllocator, bool returnGpuMemory);
void PalCmdBufferDestroy();
void PalCmdBindIndexData(
Buffer* pBuffer,
Pal::gpusize offset,
Pal::IndexType indexType);
void PalCmdUnbindIndexData(Pal::IndexType indexType);
void PalCmdDraw(
uint32_t firstVertex,
uint32_t vertexCount,
uint32_t firstInstance,
uint32_t instanceCount);
void PalCmdDrawIndexed(
uint32_t firstIndex,
uint32_t indexCount,
int32_t vertexOffset,
uint32_t firstInstance,
uint32_t instanceCount);
void PalCmdDispatch(
uint32_t x,
uint32_t y,
uint32_t z);
void PalCmdDispatchOffset(
uint32_t base_x,
uint32_t base_y,
uint32_t base_z,
uint32_t size_x,
uint32_t size_y,
uint32_t size_z);
void PalCmdDispatchIndirect(
Buffer* pBuffer,
Pal::gpusize offset);
void PalCmdCopyBuffer(
Buffer* pSrcBuffer,
Buffer* pDstBuffer,
uint32_t regionCount,
Pal::MemoryCopyRegion* pRegions);
void PalCmdCopyImage(
const Image* const pSrcImage,
Pal::ImageLayout srcImageLayout,
const Image* const pDstImage,
Pal::ImageLayout destImageLayout,
uint32_t regionCount,
Pal::ImageCopyRegion* pRegions);
void PalCmdScaledCopyImage(
const Image* const pSrcImage,
const Image* const pDstImage,
Pal::ScaledCopyInfo& copyInfo);
void PalCmdCopyMemoryToImage(
const Buffer* pSrcBuffer,
const Image* pDstImage,
Pal::ImageLayout layout,
uint32_t regionCount,
Pal::MemoryImageCopyRegion* pRegions);
void PalCmdCopyImageToMemory(
const Image* pSrcImage,
const Buffer* pDstBuffer,
Pal::ImageLayout layout,
uint32_t regionCount,
Pal::MemoryImageCopyRegion* pRegions);
void PalCmdUpdateBuffer(
Buffer* pDestBuffer,
Pal::gpusize offset,
Pal::gpusize size,
const uint32_t* pData);
void PalCmdFillBuffer(
Buffer* pDestBuffer,
Pal::gpusize offset,
Pal::gpusize size,
uint32_t data);
void PalCmdClearColorImage(
const Image& image,
Pal::ImageLayout imageLayout,
const Pal::ClearColor& color,
Pal::uint32 rangeCount,
const Pal::SubresRange* pRanges,
Pal::uint32 boxCount,
const Pal::Box* pBoxes,
Pal::uint32 flags);
void PalCmdClearDepthStencil(
const Image& image,
Pal::ImageLayout depthLayout,
Pal::ImageLayout stencilLayout,
float depth,
Pal::uint8 stencil,
Pal::uint32 rangeCount,
const Pal::SubresRange* pRanges,
Pal::uint32 rectCount,
const Pal::Rect* pRects,
Pal::uint32 flags);
template <typename EventContainer_T>
void PalCmdResetEvent(
EventContainer_T* pEvent,
Pal::HwPipePoint resetPoint);
template <typename EventContainer_T>
void PalCmdSetEvent(
EventContainer_T* pEvent,
Pal::HwPipePoint resetPoint);
template< bool regionPerDevice >
void PalCmdResolveImage(
const Image& srcImage,
Pal::ImageLayout srcImageLayout,
const Image& dstImage,
Pal::ImageLayout dstImageLayout,
uint32_t regionCount,
const Pal::ImageResolveRegion* pRegions);
void PalCmdSetIndirectUserDataWatermark(
uint16_t tableId,
uint32_t dwordLimit);
void PreBltBindMsaaState(const Image& image);
void PostBltRestoreMsaaState();
void PalCmdBindMsaaStates(const Pal::IMsaaState* const * pStates);
VK_INLINE void PalCmdBindMsaaState(
Pal::ICmdBuffer* pPalCmdBuf,
uint32_t deviceIdx,
const Pal::IMsaaState* pState);
VK_INLINE void PalCmdBindColorBlendState(
Pal::ICmdBuffer* pPalCmdBuf,
uint32_t deviceIdx,
const Pal::IColorBlendState* pState);
VK_INLINE void PalCmdBindDepthStencilState(
Pal::ICmdBuffer* pPalCmdBuf,
uint32_t deviceIdx,
const Pal::IDepthStencilState* pState);
void PalCmdSetMsaaQuadSamplePattern(
uint32_t numSamplesPerPixel,
const Pal::MsaaQuadSamplePattern& quadSamplePattern);
VK_INLINE void PalCmdBufferSetUserData(
Pal::PipelineBindPoint bindPoint,
uint32_t firstEntry,
uint32_t entryCount,
uint32_t perDeviceStride,
const uint32_t* pEntryValues);
template< typename EventContainer_T >
VK_INLINE void InsertDeviceEvents(
const Pal::IGpuEvent** pDestEvents,
const EventContainer_T* pSrcEvents,
uint32_t index,
uint32_t stride) const;
VK_INLINE uint32_t NumDeviceEvents(uint32_t numEvents) const
{
return m_pDevice->NumPalDevices() * numEvents;
}
#if VK_ENABLE_DEBUG_BARRIERS
VK_INLINE void DbgBarrierPreCmd(uint32_t cmd)
{
if (m_dbgBarrierPreCmdMask & (cmd))
{
DbgCmdBarrier(true);
}
}
VK_INLINE void DbgBarrierPostCmd(uint32_t cmd)
{
if (m_dbgBarrierPostCmdMask & (cmd))
{
DbgCmdBarrier(false);
}
}
#else
VK_INLINE void DbgBarrierPreCmd(uint32_t cmd) {}
VK_INLINE void DbgBarrierPostCmd(uint32_t cmd) {}
#endif
SqttCmdBufferState* GetSqttState()
{ return m_pSqttState; }
VK_INLINE static bool IsStaticStateDifferent(
uint32_t oldToken,
uint32_t newToken);
private:
CmdBuffer(Device* pDevice, CmdPool* pCmdPool, uint32_t queueFamilyIndex);
VkResult Initialize(
void* pPalMem,
void* pVbMem,
const Pal::CmdBufferCreateInfo& createInfo);
void ResetState();
void FlushBarriers(
Pal::BarrierInfo* pBarrier,
Pal::BarrierTransition* const pTransitions,
const Image** pTransitionImages,
uint32_t mainTransitionCount,
uint32_t postTransitionStartIdx,
uint32_t postTransitionCount);
void ExecuteBarriers(
VirtualStackFrame& virtStackFrame,
uint32_t memBarrierCount,
const VkMemoryBarrier* pMemoryBarriers,
uint32_t bufferMemoryBarrierCount,
const VkBufferMemoryBarrier* pBufferMemoryBarriers,
uint32_t imageMemoryBarrierCount,
const VkImageMemoryBarrier* pImageMemoryBarriers,
Pal::BarrierInfo* pBarrier);
void RebindCompatibleUserData(
uint32_t bindPoint,
const PipelineLayout* pNewLayout);
void PalBindPipeline(
VkPipelineBindPoint pipelineBindPoint,
VkPipeline pipeline);
void AlignMemoryImageCopyRegion(
const Pal::IImage* pImage,
Pal::MemoryImageCopyRegion* pRegion) const;
template< typename Type_T >
bool DetectCopyOverwrite(const Type_T* pDst) const;
VK_INLINE void RPBeginSubpass();
VK_INLINE void RPEndSubpass();
void RPResolveAttachments(uint32_t count, const RPResolveInfo* pResolves);
void RPSyncPoint(const RPSyncPointInfo& syncPoint, VirtualStackFrame* pVirtStack);
void RPLoadOpClearColor(uint32_t count, const RPLoadOpClearInfo* pClears);
void RPLoadOpClearDepthStencil(uint32_t count, const RPLoadOpClearInfo* pClears);
void RPBindTargets(const RPBindTargetsInfo& targets);
void RPInitSamplePattern();
void RPSetViewInstanceMask();
VK_INLINE Pal::ImageLayout RPGetAttachmentLayout(uint32_t attachment, Pal::ImageAspect aspect);
VK_INLINE void RPSetAttachmentLayout(uint32_t attachment, Pal::ImageAspect aspect, Pal::ImageLayout layout);
void FillTimestampQueryPool(
const TimestampQueryPool& timestampQueryPool,
const uint32_t firstQuery,
const uint32_t queryCount,
const uint32_t timestampChunk);
#if VK_ENABLE_DEBUG_BARRIERS
void DbgCmdBarrier(bool preCmd);
#endif
Device* const m_pDevice;
CmdPool* const m_pCmdPool;
uint32_t m_queueFamilyIndex;
Pal::QueueType m_palQueueType;
Pal::EngineType m_palEngineType;
uint32_t m_palDeviceMask;
uint32_t m_palDeviceUsedMask;
Pal::ICmdBuffer* m_pPalCmdBuffers[MaxPalDevices];
VirtualStackAllocator* m_pStackAllocator;
GpuEventMgr* m_pGpuEventMgr;
CmdBufferRenderState m_state; // Render state tracked during command buffer building
VertBufBindingMgr m_vbMgr; // Manages current vertex buffer bindings
StencilOpsCombiner m_stencilCombiner; // Manages internal stencil combined state
bool m_is2ndLvl; // is this command buffer secondary or primary
bool m_isRecording;
bool m_needResetState;
SqttCmdBufferState* m_pSqttState; // Per-cmdbuf state for handling SQ thread-tracing annotations
RenderPassInstanceState m_renderPassInstance;
#if VK_ENABLE_DEBUG_BARRIERS
uint32_t m_dbgBarrierPreCmdMask;
uint32_t m_dbgBarrierPostCmdMask;
#endif
};
// =====================================================================================================================
bool CmdBuffer::IsStaticStateDifferent(
uint32_t currentToken,
uint32_t newToken)
{
return ((currentToken != newToken) ||
(currentToken == DynamicRenderStateToken));
}
// =====================================================================================================================
void CmdBuffer::PalCmdBindMsaaState(
Pal::ICmdBuffer* pPalCmdBuf,
uint32_t deviceIdx,
const Pal::IMsaaState* pState)
{
VK_ASSERT(((1UL << deviceIdx) & m_palDeviceMask) != 0);
if (pState != m_state.perGpuState[deviceIdx].pMsaaState)
{
pPalCmdBuf->CmdBindMsaaState(pState);
m_state.perGpuState[deviceIdx].pMsaaState = pState;
}
}
// =====================================================================================================================
void CmdBuffer::PalCmdBindColorBlendState(
Pal::ICmdBuffer* pPalCmdBuf,
uint32_t deviceIdx,
const Pal::IColorBlendState* pState)
{
VK_ASSERT(((1UL << deviceIdx) & m_palDeviceMask) != 0);
if (pState != m_state.perGpuState[deviceIdx].pColorBlendState)
{
pPalCmdBuf->CmdBindColorBlendState(pState);
m_state.perGpuState[deviceIdx].pColorBlendState = pState;
}
}
// =====================================================================================================================
void CmdBuffer::PalCmdBindDepthStencilState(
Pal::ICmdBuffer* pPalCmdBuf,
uint32_t deviceIdx,
const Pal::IDepthStencilState* pState)
{
VK_ASSERT(((1UL << deviceIdx) & m_palDeviceMask) != 0);
if (pState != m_state.perGpuState[deviceIdx].pDepthStencilState)
{
pPalCmdBuf->CmdBindDepthStencilState(pState);
m_state.perGpuState[deviceIdx].pDepthStencilState = pState;
}
}
// =====================================================================================================================
void CmdBuffer::PalCmdBufferSetUserData(
Pal::PipelineBindPoint bindPoint,
uint32_t firstEntry,
uint32_t entryCount,
uint32_t perDeviceStride,
const uint32_t* pEntryValues)
{
for (uint32_t deviceIdx = 0; deviceIdx < m_pDevice->NumPalDevices(); deviceIdx++)
{
PalCmdBuffer(deviceIdx)->CmdSetUserData(bindPoint,
firstEntry,
entryCount,
pEntryValues + (deviceIdx * perDeviceStride));
}
}
// =====================================================================================================================
template< typename EventContainer_T >
void CmdBuffer::InsertDeviceEvents(
const Pal::IGpuEvent** pDestEvents,
const EventContainer_T* pSrcEvents,
uint32_t index,
uint32_t stride
) const
{
for (uint32_t deviceIdx = 0; deviceIdx < m_pDevice->NumPalDevices(); deviceIdx++)
{
pDestEvents[(deviceIdx * stride) + index] = pSrcEvents->PalEvent(deviceIdx);
}
}
// =====================================================================================================================
Pal::ImageLayout CmdBuffer::RPGetAttachmentLayout(
uint32_t attachment,
Pal::ImageAspect aspect)
{
VK_ASSERT(aspect == Pal::ImageAspect::Color ||
aspect == Pal::ImageAspect::Depth ||
aspect == Pal::ImageAspect::Stencil);
VK_ASSERT(static_cast<size_t>(aspect) < 3);
VK_ASSERT(attachment < m_state.allGpuState.pRenderPass->GetAttachmentCount());
VK_ASSERT(attachment < m_renderPassInstance.maxAttachmentCount);
return m_renderPassInstance.pAttachments[attachment].aspectLayout[static_cast<size_t>(aspect)];
}
// =====================================================================================================================
void CmdBuffer::RPSetAttachmentLayout(
uint32_t attachment,
Pal::ImageAspect aspect,
Pal::ImageLayout layout)
{
VK_ASSERT(aspect == Pal::ImageAspect::Color ||
aspect == Pal::ImageAspect::Depth ||
aspect == Pal::ImageAspect::Stencil);
VK_ASSERT(static_cast<size_t>(aspect) < 3);
VK_ASSERT(attachment < m_state.allGpuState.pRenderPass->GetAttachmentCount());
VK_ASSERT(attachment < m_renderPassInstance.maxAttachmentCount);
m_renderPassInstance.pAttachments[attachment].aspectLayout[static_cast<size_t>(aspect)] = layout;
}
VK_DEFINE_DISPATCHABLE(CmdBuffer);
namespace entry