Skip to content

Commit

Permalink
vk/11/12: bind VBO only once per frame to save on draw calls.
Browse files Browse the repository at this point in the history
  • Loading branch information
chyyran committed Feb 6, 2023
1 parent d4525ee commit 3db89e5
Show file tree
Hide file tree
Showing 14 changed files with 93 additions and 100 deletions.
8 changes: 8 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions librashader-runtime-d3d11/Cargo.toml
Expand Up @@ -23,6 +23,7 @@ spirv_cross = { package = "librashader-spirv-cross", version = "0.23" }
rustc-hash = "1.1.0"
bytemuck = "1.12.3"
rayon = "1.6.1"
array-concat = "0.5.2"

[target.'cfg(windows)'.dependencies.windows]
version = "0.44.0"
Expand Down
@@ -1,3 +1,4 @@
use array_concat::concat_arrays;
use crate::error;
use crate::error::assume_d3d11_init;
use bytemuck::offset_of;
Expand All @@ -21,7 +22,7 @@ struct D3D11Vertex {

const CLEAR: [f32; 4] = [1.0, 1.0, 1.0, 1.0];

static OFFSCREEN_VBO_DATA: &[D3D11Vertex; 4] = &[
const OFFSCREEN_VBO_DATA: [D3D11Vertex; 4] = [
D3D11Vertex {
position: [-1.0, -1.0],
texcoord: [0.0, 1.0],
Expand All @@ -44,7 +45,7 @@ static OFFSCREEN_VBO_DATA: &[D3D11Vertex; 4] = &[
},
];

static FINAL_VBO_DATA: &[D3D11Vertex; 4] = &[
const FINAL_VBO_DATA: [D3D11Vertex; 4] = [
D3D11Vertex {
position: [0.0, 0.0],
texcoord: [0.0, 1.0],
Expand All @@ -67,84 +68,70 @@ static FINAL_VBO_DATA: &[D3D11Vertex; 4] = &[
},
];

static VBO_DATA: &[D3D11Vertex; 8] = &concat_arrays!(OFFSCREEN_VBO_DATA, FINAL_VBO_DATA);

pub(crate) struct DrawQuad {
final_vbo: ID3D11Buffer,
context: ID3D11DeviceContext,
offset: u32,
stride: u32,
offscreen_vbo: ID3D11Buffer,
vbo: ID3D11Buffer,
}

impl DrawQuad {
pub fn new(device: &ID3D11Device, context: &ID3D11DeviceContext) -> error::Result<DrawQuad> {
unsafe {
let mut final_vbo = None;
device.CreateBuffer(
&D3D11_BUFFER_DESC {
ByteWidth: std::mem::size_of::<[D3D11Vertex; 4]>() as u32,
Usage: D3D11_USAGE_IMMUTABLE,
BindFlags: D3D11_BIND_VERTEX_BUFFER,
CPUAccessFlags: Default::default(),
MiscFlags: Default::default(),
StructureByteStride: 0,
},
Some(&D3D11_SUBRESOURCE_DATA {
pSysMem: FINAL_VBO_DATA.as_ptr().cast(),
SysMemPitch: 0,
SysMemSlicePitch: 0,
}),
Some(&mut final_vbo),
)?;
assume_d3d11_init!(final_vbo, "CreateBuffer");

let mut offscreen_vbo = None;
let mut vbo = None;
device.CreateBuffer(
&D3D11_BUFFER_DESC {
ByteWidth: std::mem::size_of::<[D3D11Vertex; 4]>() as u32,
ByteWidth: 2 * std::mem::size_of::<[D3D11Vertex; 4]>() as u32,
Usage: D3D11_USAGE_IMMUTABLE,
BindFlags: D3D11_BIND_VERTEX_BUFFER,
CPUAccessFlags: Default::default(),
MiscFlags: Default::default(),
StructureByteStride: 0,
},
Some(&D3D11_SUBRESOURCE_DATA {
pSysMem: OFFSCREEN_VBO_DATA.as_ptr().cast(),
pSysMem: VBO_DATA.as_ptr().cast(),
SysMemPitch: 0,
SysMemSlicePitch: 0,
}),
Some(&mut offscreen_vbo),
Some(&mut vbo),
)?;
assume_d3d11_init!(offscreen_vbo, "CreateBuffer");
assume_d3d11_init!(vbo, "CreateBuffer");

Ok(DrawQuad {
final_vbo,
offscreen_vbo,
vbo,
context: context.clone(),
offset: 0,
stride: std::mem::size_of::<D3D11Vertex>() as u32,
})
}
}

pub fn bind_vertices(&self, vbo_type: QuadType) {
pub fn bind_vbo_for_frame(&self) {
unsafe {
self.context
.IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
let buffer = match vbo_type {
QuadType::Offscreen => &self.offscreen_vbo,
QuadType::Final => &self.final_vbo,
};

self.context.IASetVertexBuffers(
0,
1,
Some(&Some(buffer.clone())),
Some(&Some(self.vbo.clone())),
Some(&self.stride),
Some(&self.offset),
Some(&0),
);
}
}

pub fn draw_quad(&self, context: &ID3D11DeviceContext, vbo_type: QuadType) {
let offset = match vbo_type {
QuadType::Offscreen => 0,
QuadType::Final => 4,
};

unsafe {
context.Draw(4, offset);
}
}

pub fn get_spirv_cross_vbo_desc() -> [D3D11_INPUT_ELEMENT_DESC; 2] {
[
D3D11_INPUT_ELEMENT_DESC {
Expand Down
4 changes: 3 additions & 1 deletion librashader-runtime-d3d11/src/filter_chain.rs
Expand Up @@ -17,7 +17,7 @@ use crate::error::{assume_d3d11_init, FilterChainError};
use crate::filter_pass::{ConstantBufferBinding, FilterPass};
use crate::framebuffer::OwnedFramebuffer;
use crate::options::{FilterChainOptionsD3D11, FrameOptionsD3D11};
use crate::quad_render::DrawQuad;
use crate::draw_quad::DrawQuad;
use crate::render_target::RenderTarget;
use crate::samplers::SamplerSet;
use crate::util::d3d11_compile_bound_shader;
Expand Down Expand Up @@ -472,6 +472,8 @@ impl FilterChainD3D11 {
let passes_len = passes.len();
let (pass, last) = passes.split_at_mut(passes_len - 1);

self.common.draw_quad.bind_vbo_for_frame();

for (index, pass) in pass.iter_mut().enumerate() {
source.filter = pass.config.filter;
source.wrap_mode = pass.config.wrap_mode;
Expand Down
6 changes: 1 addition & 5 deletions librashader-runtime-d3d11/src/filter_pass.rs
Expand Up @@ -159,7 +159,6 @@ impl FilterPass {
}
}
unsafe {
parent.draw_quad.bind_vertices(vbo_type);
context.IASetInputLayout(&self.vertex_layout);
context.VSSetShader(&self.vertex_shader, None);
context.PSSetShader(&self.pixel_shader, None);
Expand Down Expand Up @@ -243,10 +242,7 @@ impl FilterPass {
context.RSSetViewports(Some(&[output.output.viewport]))
}

unsafe {
// must be under primitive topology trianglestrip with quad
context.Draw(4, 0);
}
parent.draw_quad.draw_quad(context, vbo_type);

unsafe {
// unbind resources.
Expand Down
5 changes: 3 additions & 2 deletions librashader-runtime-d3d11/src/lib.rs
Expand Up @@ -15,7 +15,7 @@ mod filter_pass;
mod framebuffer;
pub mod options;
mod parameters;
mod quad_render;
mod draw_quad;
mod render_target;
mod samplers;
mod texture;
Expand All @@ -37,8 +37,9 @@ mod tests {
// "../test/slang-shaders/presets/crt-geom-ntsc-upscale-sharp.slangp",
// "../test/slang-shaders/bezel/Mega_Bezel/Presets/MBZ__0__SMOOTH-ADV.slangp",
// "../test/null.slangp",
const FILTER_PATH: &str = "../test/slang-shaders/scalefx/scalefx-9x.slangp";

const FILTER_PATH: &str = "../test/slang-shaders/bezel/Mega_Bezel/Presets/MBZ__0__SMOOTH-ADV.slangp";
// const FILTER_PATH: &str = "../test/slang-shaders/bezel/Mega_Bezel/Presets/MBZ__0__SMOOTH-ADV.slangp";
const IMAGE_PATH: &str = "../test/finalfightlong.png";
#[test]
fn triangle_d3d11_args() {
Expand Down
1 change: 1 addition & 0 deletions librashader-runtime-d3d12/Cargo.toml
Expand Up @@ -25,6 +25,7 @@ bytemuck = { version = "1.12.3", features = ["derive"] }
array-init = "2.1.0"
bitvec = "1.0.1"
widestring = "1.0.2"
array-concat = "0.5.2"

rayon = "1.6.1"

Expand Down
3 changes: 3 additions & 0 deletions librashader-runtime-d3d12/src/filter_chain.rs
Expand Up @@ -604,6 +604,9 @@ impl FilterChainD3D12 {
cmd.SetGraphicsRootSignature(&self.common.root_signature.handle);
self.common.mipmap_gen.pin_root_signature(cmd);
}

self.common.draw_quad.bind_vertices_for_frame(cmd);

for (index, pass) in pass.iter_mut().enumerate() {
source.filter = pass.config.filter;
source.wrap_mode = pass.config.wrap_mode;
Expand Down
4 changes: 1 addition & 3 deletions librashader-runtime-d3d12/src/filter_pass.rs
Expand Up @@ -142,7 +142,6 @@ impl FilterPass {
output: &RenderTarget,
vbo_type: QuadType,
) -> error::Result<()> {
parent.draw_quad.bind_vertices(cmd, vbo_type);
unsafe {
cmd.SetPipelineState(&self.pipeline.handle);
}
Expand Down Expand Up @@ -212,8 +211,7 @@ impl FilterPass {
bottom: output.output.size.height as i32,
}]);

// todo put this in drawquad
cmd.DrawInstanced(4, 1, 0, 0)
parent.draw_quad.draw_quad(&cmd, vbo_type)
}

unsafe { cmd.EndRenderPass() }
Expand Down
67 changes: 28 additions & 39 deletions librashader-runtime-d3d12/src/quad_render.rs
@@ -1,13 +1,11 @@
use array_concat::concat_arrays;
use crate::buffer::D3D12Buffer;
use crate::error;
use bytemuck::{offset_of, Pod, Zeroable};
use librashader_runtime::quad::QuadType;
use windows::core::PCSTR;
use windows::Win32::Graphics::Direct3D::D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP;
use windows::Win32::Graphics::Direct3D12::{
ID3D12Device, ID3D12GraphicsCommandList, ID3D12Resource,
D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, D3D12_INPUT_ELEMENT_DESC, D3D12_VERTEX_BUFFER_VIEW,
};
use windows::Win32::Graphics::Direct3D12::{ID3D12Device, ID3D12GraphicsCommandList, ID3D12Resource, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, D3D12_INPUT_ELEMENT_DESC, D3D12_VERTEX_BUFFER_VIEW, ID3D12GraphicsCommandList4};
use windows::Win32::Graphics::Dxgi::Common::DXGI_FORMAT_R32G32_FLOAT;

#[repr(C)]
Expand All @@ -20,7 +18,7 @@ struct D3D12Vertex {

const CLEAR: [f32; 4] = [1.0, 1.0, 1.0, 1.0];

static OFFSCREEN_VBO_DATA: &[D3D12Vertex; 4] = &[
const OFFSCREEN_VBO_DATA: [D3D12Vertex; 4] = [
D3D12Vertex {
position: [-1.0, -1.0],
texcoord: [0.0, 1.0],
Expand All @@ -43,7 +41,7 @@ static OFFSCREEN_VBO_DATA: &[D3D12Vertex; 4] = &[
},
];

static FINAL_VBO_DATA: &[D3D12Vertex; 4] = &[
const FINAL_VBO_DATA: [D3D12Vertex; 4] = [
D3D12Vertex {
position: [0.0, 0.0],
texcoord: [0.0, 1.0],
Expand All @@ -66,58 +64,49 @@ static FINAL_VBO_DATA: &[D3D12Vertex; 4] = &[
},
];

static VBO_DATA: &[D3D12Vertex; 8] = &concat_arrays!(OFFSCREEN_VBO_DATA, FINAL_VBO_DATA);

pub(crate) struct DrawQuad {
offscreen_buffer: ID3D12Resource,
offscreen_view: D3D12_VERTEX_BUFFER_VIEW,
final_buffer: ID3D12Resource,
final_view: D3D12_VERTEX_BUFFER_VIEW,
buffer: ID3D12Resource,
view: D3D12_VERTEX_BUFFER_VIEW,
}

impl DrawQuad {
pub fn new(device: &ID3D12Device) -> error::Result<DrawQuad> {
let stride = std::mem::size_of::<D3D12Vertex>() as u32;
let size = std::mem::size_of::<[D3D12Vertex; 4]>() as u32;
let mut offscreen_buffer = D3D12Buffer::new(device, size as usize)?;
offscreen_buffer
.map(None)?
.slice
.copy_from_slice(bytemuck::cast_slice(OFFSCREEN_VBO_DATA));

let offscreen_view = D3D12_VERTEX_BUFFER_VIEW {
BufferLocation: offscreen_buffer.gpu_address(),
SizeInBytes: size,
StrideInBytes: stride,
};

let offscreen_buffer = offscreen_buffer.into_raw();

let mut final_buffer = D3D12Buffer::new(device, size as usize)?;
final_buffer
let size = 2 * std::mem::size_of::<[D3D12Vertex; 4]>() as u32;
let mut buffer = D3D12Buffer::new(device, size as usize)?;
buffer
.map(None)?
.slice
.copy_from_slice(bytemuck::cast_slice(FINAL_VBO_DATA));
.copy_from_slice(bytemuck::cast_slice(VBO_DATA));

let final_view = D3D12_VERTEX_BUFFER_VIEW {
BufferLocation: final_buffer.gpu_address(),
let view = D3D12_VERTEX_BUFFER_VIEW {
BufferLocation: buffer.gpu_address(),
SizeInBytes: size,
StrideInBytes: stride,
};

let final_buffer = final_buffer.into_raw();

Ok(DrawQuad { offscreen_buffer, offscreen_view, final_buffer, final_view })
let buffer = buffer.into_raw();
Ok(DrawQuad { buffer, view })
}

pub fn bind_vertices(&self, cmd: &ID3D12GraphicsCommandList, vbo_type: QuadType) {
pub fn bind_vertices_for_frame(&self, cmd: &ID3D12GraphicsCommandList) {
unsafe {
cmd.IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
cmd.IASetVertexBuffers(0, Some(&[self.view]));
}
}

let view = match vbo_type {
QuadType::Offscreen => [self.offscreen_view],
QuadType::Final => [self.final_view],
};
// frame uses ID3D12GraphicsCommandList4 for renderpasses, don't need to bother with the cast.
pub fn draw_quad(&self, cmd: &ID3D12GraphicsCommandList4, vbo_type: QuadType) {
let offset = match vbo_type {
QuadType::Offscreen => 0,
QuadType::Final => 4,
};

cmd.IASetVertexBuffers(0, Some(&view));
unsafe {
cmd.DrawInstanced(4, 1, offset, 0)
}
}

Expand Down

0 comments on commit 3db89e5

Please sign in to comment.