Skip to content

Commit

Permalink
GS:MTL: CAS support
Browse files Browse the repository at this point in the history
  • Loading branch information
TellowKrinkle authored and refractionpcsx2 committed Nov 23, 2022
1 parent f7c79fe commit 2fb8ecb
Show file tree
Hide file tree
Showing 7 changed files with 176 additions and 8 deletions.
1 change: 1 addition & 0 deletions pcsx2/CMakeLists.txt
Expand Up @@ -794,6 +794,7 @@ if(USE_VULKAN)
endif()

set(pcsx2GSMetalShaders
GS/Renderers/Metal/cas.metal
GS/Renderers/Metal/convert.metal
GS/Renderers/Metal/present.metal
GS/Renderers/Metal/merge.metal
Expand Down
2 changes: 2 additions & 0 deletions pcsx2/GS/Renderers/Metal/GSDeviceMTL.h
Expand Up @@ -237,6 +237,7 @@ class GSDeviceMTL final : public GSDevice
MRCOwned<id<MTLFence>> m_spin_fence;

// Functions and Pipeline States
MRCOwned<id<MTLComputePipelineState>> m_cas_pipeline[2];
MRCOwned<id<MTLRenderPipelineState>> m_convert_pipeline[static_cast<int>(ShaderConvert::Count)];
MRCOwned<id<MTLRenderPipelineState>> m_present_pipeline[static_cast<int>(PresentShader::Count)];
MRCOwned<id<MTLRenderPipelineState>> m_convert_pipeline_copy[2];
Expand Down Expand Up @@ -359,6 +360,7 @@ class GSDeviceMTL final : public GSDevice

MRCOwned<id<MTLFunction>> LoadShader(NSString* name);
MRCOwned<id<MTLRenderPipelineState>> MakePipeline(MTLRenderPipelineDescriptor* desc, id<MTLFunction> vertex, id<MTLFunction> fragment, NSString* name);
MRCOwned<id<MTLComputePipelineState>> MakeComputePipeline(id<MTLFunction> compute, NSString* name);
bool Create() override;

void ClearRenderTarget(GSTexture* t, const GSVector4& c) override;
Expand Down
56 changes: 48 additions & 8 deletions pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm
Expand Up @@ -503,6 +503,9 @@ static constexpr MTLPixelFormat ConvertPixelFormat(GSTexture::Format format)
else
[desc setUsage:MTLTextureUsageShaderRead | MTLTextureUsageRenderTarget];
break;
case GSTexture::Type::RWTexture:
[desc setUsage:MTLTextureUsageShaderRead | MTLTextureUsageShaderWrite];
break;
default:
[desc setUsage:MTLTextureUsageShaderRead | MTLTextureUsageRenderTarget];
}
Expand Down Expand Up @@ -625,9 +628,24 @@ static constexpr MTLPixelFormat ConvertPixelFormat(GSTexture::Format format)
#endif

bool GSDeviceMTL::DoCAS(GSTexture* sTex, GSTexture* dTex, bool sharpen_only, const std::array<u32, NUM_CAS_CONSTANTS>& constants)
{
return false;
}
{ @autoreleasepool {
static constexpr int threadGroupWorkRegionDim = 16;
const int dispatchX = (dTex->GetWidth() + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim;
const int dispatchY = (dTex->GetHeight() + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim;
static_assert(sizeof(constants) == sizeof(GSMTLCASPSUniform));

EndRenderPass();
id<MTLComputeCommandEncoder> enc = [GetRenderCmdBuf() computeCommandEncoder];
[enc setLabel:@"CAS"];
[enc setComputePipelineState:m_cas_pipeline[sharpen_only]];
[enc setTexture:static_cast<GSTextureMTL*>(sTex)->GetTexture() atIndex:0];
[enc setTexture:static_cast<GSTextureMTL*>(dTex)->GetTexture() atIndex:1];
[enc setBytes:&constants length:sizeof(constants) atIndex:GSMTLBufferIndexUniforms];
[enc dispatchThreadgroups:MTLSizeMake(dispatchX, dispatchY, 1)
threadsPerThreadgroup:MTLSizeMake(64, 1, 1)];
[enc endEncoding];
return true;
}}

MRCOwned<id<MTLFunction>> GSDeviceMTL::LoadShader(NSString* name)
{
Expand Down Expand Up @@ -658,6 +676,26 @@ static constexpr MTLPixelFormat ConvertPixelFormat(GSTexture::Format format)
return res;
}

MRCOwned<id<MTLComputePipelineState>> GSDeviceMTL::MakeComputePipeline(id<MTLFunction> compute, NSString* name)
{
MRCOwned<MTLComputePipelineDescriptor*> desc = MRCTransfer([MTLComputePipelineDescriptor new]);
[desc setLabel:name];
[desc setComputeFunction:compute];
NSError* err;
MRCOwned<id<MTLComputePipelineState>> res = MRCTransfer([m_dev.dev
newComputePipelineStateWithDescriptor:desc
options:0
reflection:nil
error:&err]);
if (unlikely(err))
{
NSString* msg = [NSString stringWithFormat:@"Failed to create pipeline %@: %@", name, [err localizedDescription]];
Console.Error("%s", [msg UTF8String]);
throw GSRecoverableError();
}
return res;
}

static void applyAttribute(MTLVertexDescriptor* desc, NSUInteger idx, MTLVertexFormat fmt, NSUInteger offset, NSUInteger buffer_index)
{
MTLVertexAttributeDescriptor* attrs = desc.attributes[idx];
Expand Down Expand Up @@ -704,6 +742,7 @@ static void setFnConstantI(MTLFunctionConstantValues* fc, unsigned int value, GS
m_features.framebuffer_fetch = m_dev.features.framebuffer_fetch;
m_features.dual_source_blend = true;
m_features.stencil_buffer = true;
m_features.cas_sharpening = true;

try
{
Expand All @@ -725,12 +764,13 @@ static void setFnConstantI(MTLFunctionConstantValues* fc, unsigned int value, GS
[clearSpinBuffer fillBuffer:m_spin_buffer range:NSMakeRange(0, 4) value:0];
[clearSpinBuffer updateFence:m_spin_fence];
[clearSpinBuffer endEncoding];
NSError* err = nullptr;
m_spin_pipeline = MRCTransfer([m_dev.dev newComputePipelineStateWithFunction:LoadShader(@"waste_time") error:&err]);
if (err)
m_spin_pipeline = MakeComputePipeline(LoadShader(@"waste_time"), @"waste_time");

for (int sharpen_only = 0; sharpen_only < 2; sharpen_only++)
{
Console.Error("Failed to create spin pipeline: %s", [[err localizedDescription] UTF8String]);
return false;
setFnConstantB(m_fn_constants, sharpen_only, GSMTLConstantIndex_CAS_SHARPEN_ONLY);
NSString* shader = m_dev.features.has_fast_half ? @"CASHalf" : @"CASFloat";
m_cas_pipeline[sharpen_only] = MakeComputePipeline(LoadShader(shader), sharpen_only ? @"CAS Sharpen" : @"CAS Upscale");
}

m_hw_vertex = MRCTransfer([MTLVertexDescriptor new]);
Expand Down
1 change: 1 addition & 0 deletions pcsx2/GS/Renderers/Metal/GSMTLDeviceInfo.h
Expand Up @@ -42,6 +42,7 @@ struct GSMTLDevice
bool framebuffer_fetch;
bool primid;
bool slow_color_compression; ///< Color compression seems to slow down rt read on AMD
bool has_fast_half;
MetalVersion shader_version;
int max_texsize;
};
Expand Down
4 changes: 4 additions & 0 deletions pcsx2/GS/Renderers/Metal/GSMTLDeviceInfo.mm
Expand Up @@ -152,6 +152,10 @@ static DetectionResult detectIntelGPU(id<MTLDevice> dev, id<MTLLibrary> lib)
if ([dev supportsFamily:MTLGPUFamilyApple1])
features.framebuffer_fetch = true;

if (@available(macOS 10.15, iOS 13.0, *))
if ([dev supportsFamily:MTLGPUFamilyMac2] || [dev supportsFamily:MTLGPUFamilyApple1])
features.has_fast_half = true; // Approximate guess

features.shader_version = detectLibraryVersion(shaders);
if (features.framebuffer_fetch && features.shader_version < MetalVersion::Metal23)
{
Expand Down
8 changes: 8 additions & 0 deletions pcsx2/GS/Renderers/Metal/GSMTLSharedHeader.h
Expand Up @@ -57,6 +57,13 @@ struct GSMTLInterlacePSUniform
vector_float4 ZrH;
};

struct GSMTLCASPSUniform
{
vector_uint4 const0;
vector_uint4 const1;
vector_int2 srcOffset;
};

struct GSMTLMainVertex
{
vector_float2 st;
Expand Down Expand Up @@ -132,6 +139,7 @@ enum class GSMTLExpandType : unsigned char

enum GSMTLFnConstants
{
GSMTLConstantIndex_CAS_SHARPEN_ONLY,
GSMTLConstantIndex_SCALING_FACTOR,
GSMTLConstantIndex_FRAMEBUFFER_FETCH,
GSMTLConstantIndex_FST,
Expand Down
112 changes: 112 additions & 0 deletions pcsx2/GS/Renderers/Metal/cas.metal
@@ -0,0 +1,112 @@
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2022 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/

#define A_GPU 1
#define A_MSL 1
#define A_HALF 1

#include "../../../../bin/resources/shaders/common/ffx_a.h"

struct CASTextureF
{
const thread texture2d<float, access::read>& tex;
uint2 offset;
};

struct CASTextureH
{
const thread texture2d<half, access::read>& tex;
ushort2 offset;
};

#define CAS_TEXTURE CASTextureF
#define CAS_TEXTUREH CASTextureH

A_STATIC AF3 CasLoad(CASTextureF tex, ASU2 coord)
{
return tex.tex.read(AU2(coord) + tex.offset).rgb;
}
#define CasInput(r,g,b)

A_STATIC AH3 CasLoadH(CASTextureH tex, ASW2 coord)
{
return tex.tex.read(AW2(coord) + tex.offset).rgb;
}

A_STATIC void CasInputH(inoutAH2 r, inoutAH2 g, inoutAH2 b){}

#include "../../../../bin/resources/shaders/common/ffx_cas.h"

#include "GSMTLShaderCommon.h"

constant bool CAS_SHARPEN_ONLY [[function_constant(GSMTLConstantIndex_CAS_SHARPEN_ONLY)]];

kernel void CASFloat(
uint2 localID [[thread_position_in_threadgroup]],
uint2 workgroupID [[threadgroup_position_in_grid]],
texture2d<float, access::read> input [[texture(0)]],
texture2d<float, access::write> output [[texture(1)]],
constant GSMTLCASPSUniform& cb [[buffer(GSMTLBufferIndexUniforms)]])
{
// Do remapping of local xy in workgroup for a more PS-like swizzle pattern.
AU2 gxy = ARmp8x8(localID.x) + (workgroupID << 4);
const AU4 const0 = cb.const0;
const AU4 const1 = cb.const1;
const CASTextureF tex{input, AU2(cb.srcOffset)};

// Filter.
float r, g, b;

CasFilter(tex, r, g, b, gxy, const0, const1, CAS_SHARPEN_ONLY);
output.write(float4(r, g, b, 1), gxy);
gxy.x += 8;

CasFilter(tex, r, g, b, gxy, const0, const1, CAS_SHARPEN_ONLY);
output.write(float4(r, g, b, 1), gxy);
gxy.y += 8;

CasFilter(tex, r, g, b, gxy, const0, const1, CAS_SHARPEN_ONLY);
output.write(float4(r, g, b, 1), gxy);
gxy.x -= 8;

CasFilter(tex, r, g, b, gxy, const0, const1, CAS_SHARPEN_ONLY);
output.write(float4(r, g, b, 1), gxy);
}

kernel void CASHalf(
uint2 localID [[thread_position_in_threadgroup]],
uint2 workgroupID [[threadgroup_position_in_grid]],
texture2d<half, access::read> input [[texture(0)]],
texture2d<half, access::write> output [[texture(1)]],
constant GSMTLCASPSUniform& cb [[buffer(GSMTLBufferIndexUniforms)]])
{
// Do remapping of local xy in workgroup for a more PS-like swizzle pattern.
AU2 gxy = ARmp8x8(localID.x) + (workgroupID << 4);
const AU4 const0 = cb.const0;
const AU4 const1 = cb.const1;
const CASTextureH tex{input, AW2(cb.srcOffset)};

// Filter.
half2 r, g, b;

#pragma unroll
for (int i = 0; i < 2; i++)
{
CasFilterH(tex, r, g, b, gxy, const0, const1, CAS_SHARPEN_ONLY);
output.write(half4(r.x, g.x, b.x, 1), gxy);
output.write(half4(r.y, g.y, b.y, 1), gxy + AU2(8, 0));
gxy.y += 8;
}
}

0 comments on commit 2fb8ecb

Please sign in to comment.