diff --git a/pcsx2/CMakeLists.txt b/pcsx2/CMakeLists.txt
index 3a83fe5d0af3e..1d28e53c57e4b 100644
--- a/pcsx2/CMakeLists.txt
+++ b/pcsx2/CMakeLists.txt
@@ -794,6 +794,7 @@ if(USE_VULKAN)
 endif()
 
 set(pcsx2GSMetalShaders
+	GS/Renderers/Metal/cas.metal
 	GS/Renderers/Metal/convert.metal
 	GS/Renderers/Metal/present.metal
 	GS/Renderers/Metal/merge.metal
diff --git a/pcsx2/GS/Renderers/Metal/GSDeviceMTL.h b/pcsx2/GS/Renderers/Metal/GSDeviceMTL.h
index 729c70893bbc6..7e3dd775e869a 100644
--- a/pcsx2/GS/Renderers/Metal/GSDeviceMTL.h
+++ b/pcsx2/GS/Renderers/Metal/GSDeviceMTL.h
@@ -237,6 +237,7 @@ class GSDeviceMTL final : public GSDevice
 	MRCOwned<id<MTLFence>> m_spin_fence;
 
 	// Functions and Pipeline States
+	MRCOwned<id<MTLComputePipelineState>> m_cas_pipeline[2];
 	MRCOwned<id<MTLRenderPipelineState>> m_convert_pipeline[static_cast<int>(ShaderConvert::Count)];
 	MRCOwned<id<MTLRenderPipelineState>> m_present_pipeline[static_cast<int>(PresentShader::Count)];
 	MRCOwned<id<MTLRenderPipelineState>> m_convert_pipeline_copy[2];
@@ -359,6 +360,7 @@ class GSDeviceMTL final : public GSDevice
 
 	MRCOwned<id<MTLFunction>> LoadShader(NSString* name);
 	MRCOwned<id<MTLRenderPipelineState>> MakePipeline(MTLRenderPipelineDescriptor* desc, id<MTLFunction> vertex, id<MTLFunction> fragment, NSString* name);
+	MRCOwned<id<MTLComputePipelineState>> MakeComputePipeline(id<MTLFunction> compute, NSString* name);
 	bool Create() override;
 
 	void ClearRenderTarget(GSTexture* t, const GSVector4& c) override;
diff --git a/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm b/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm
index 60db4d39761b8..5a412cf06facc 100644
--- a/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm
+++ b/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm
@@ -503,6 +503,9 @@ static constexpr MTLPixelFormat ConvertPixelFormat(GSTexture::Format format)
 			else
 				[desc setUsage:MTLTextureUsageShaderRead | MTLTextureUsageRenderTarget];
 			break;
+		case GSTexture::Type::RWTexture:
+			[desc setUsage:MTLTextureUsageShaderRead | MTLTextureUsageShaderWrite];
+			break;
 		default:
 			[desc setUsage:MTLTextureUsageShaderRead | MTLTextureUsageRenderTarget];
 	}
@@ -625,9 +628,24 @@ static constexpr MTLPixelFormat ConvertPixelFormat(GSTexture::Format format)
 #endif
 
 bool GSDeviceMTL::DoCAS(GSTexture* sTex, GSTexture* dTex, bool sharpen_only, const std::array<u32, NUM_CAS_CONSTANTS>& constants)
-{
-	return false;
-}
+{ @autoreleasepool {
+	static constexpr int threadGroupWorkRegionDim = 16;
+	const int dispatchX = (dTex->GetWidth() + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim;
+	const int dispatchY = (dTex->GetHeight() + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim;
+	static_assert(sizeof(constants) == sizeof(GSMTLCASPSUniform));
+
+	EndRenderPass();
+	id<MTLComputeCommandEncoder> enc = [GetRenderCmdBuf() computeCommandEncoder];
+	[enc setLabel:@"CAS"];
+	[enc setComputePipelineState:m_cas_pipeline[sharpen_only]];
+	[enc setTexture:static_cast<GSTextureMTL*>(sTex)->GetTexture() atIndex:0];
+	[enc setTexture:static_cast<GSTextureMTL*>(dTex)->GetTexture() atIndex:1];
+	[enc setBytes:&constants length:sizeof(constants) atIndex:GSMTLBufferIndexUniforms];
+	[enc dispatchThreadgroups:MTLSizeMake(dispatchX, dispatchY, 1)
+	    threadsPerThreadgroup:MTLSizeMake(64, 1, 1)];
+	[enc endEncoding];
+	return true;
+}}
 
 MRCOwned<id<MTLFunction>> GSDeviceMTL::LoadShader(NSString* name)
 {
@@ -658,6 +676,26 @@ static constexpr MTLPixelFormat ConvertPixelFormat(GSTexture::Format format)
 	return res;
 }
 
+MRCOwned<id<MTLComputePipelineState>> GSDeviceMTL::MakeComputePipeline(id<MTLFunction> compute, NSString* name)
+{
+	MRCOwned<MTLComputePipelineDescriptor*> desc = MRCTransfer([MTLComputePipelineDescriptor new]);
+	[desc setLabel:name];
+	[desc setComputeFunction:compute];
+	NSError* err;
+	MRCOwned<id<MTLComputePipelineState>> res = MRCTransfer([m_dev.dev
+		newComputePipelineStateWithDescriptor:desc
+		                              options:0
+		                           reflection:nil
+		                                error:&err]);
+	if (unlikely(err))
+	{
+		NSString* msg = [NSString stringWithFormat:@"Failed to create pipeline %@: %@", name, [err localizedDescription]];
+		Console.Error("%s", [msg UTF8String]);
+		throw GSRecoverableError();
+	}
+	return res;
+}
+
 static void applyAttribute(MTLVertexDescriptor* desc, NSUInteger idx, MTLVertexFormat fmt, NSUInteger offset, NSUInteger buffer_index)
 {
 	MTLVertexAttributeDescriptor* attrs = desc.attributes[idx];
@@ -704,6 +742,7 @@ static void setFnConstantI(MTLFunctionConstantValues* fc, unsigned int value, GS
 	m_features.framebuffer_fetch = m_dev.features.framebuffer_fetch;
 	m_features.dual_source_blend = true;
 	m_features.stencil_buffer = true;
+	m_features.cas_sharpening = true;
 
 	try
 	{
@@ -725,12 +764,13 @@ static void setFnConstantI(MTLFunctionConstantValues* fc, unsigned int value, GS
 		[clearSpinBuffer fillBuffer:m_spin_buffer range:NSMakeRange(0, 4) value:0];
 		[clearSpinBuffer updateFence:m_spin_fence];
 		[clearSpinBuffer endEncoding];
-		NSError* err = nullptr;
-		m_spin_pipeline = MRCTransfer([m_dev.dev newComputePipelineStateWithFunction:LoadShader(@"waste_time") error:&err]);
-		if (err)
+		m_spin_pipeline = MakeComputePipeline(LoadShader(@"waste_time"), @"waste_time");
+
+		for (int sharpen_only = 0; sharpen_only < 2; sharpen_only++)
 		{
-			Console.Error("Failed to create spin pipeline: %s", [[err localizedDescription] UTF8String]);
-			return false;
+			setFnConstantB(m_fn_constants, sharpen_only, GSMTLConstantIndex_CAS_SHARPEN_ONLY);
+			NSString* shader = m_dev.features.has_fast_half ? @"CASHalf" : @"CASFloat";
+			m_cas_pipeline[sharpen_only] = MakeComputePipeline(LoadShader(shader), sharpen_only ? @"CAS Sharpen" : @"CAS Upscale");
 		}
 
 		m_hw_vertex = MRCTransfer([MTLVertexDescriptor new]);
diff --git a/pcsx2/GS/Renderers/Metal/GSMTLDeviceInfo.h b/pcsx2/GS/Renderers/Metal/GSMTLDeviceInfo.h
index c0f8c1740ae66..87ca48a6f1e1f 100644
--- a/pcsx2/GS/Renderers/Metal/GSMTLDeviceInfo.h
+++ b/pcsx2/GS/Renderers/Metal/GSMTLDeviceInfo.h
@@ -42,6 +42,7 @@ struct GSMTLDevice
 		bool framebuffer_fetch;
 		bool primid;
 		bool slow_color_compression; ///< Color compression seems to slow down rt read on AMD
+		bool has_fast_half;
 		MetalVersion shader_version;
 		int max_texsize;
 	};
diff --git a/pcsx2/GS/Renderers/Metal/GSMTLDeviceInfo.mm b/pcsx2/GS/Renderers/Metal/GSMTLDeviceInfo.mm
index 9a8eafef7c9bb..22cb6456be7d3 100644
--- a/pcsx2/GS/Renderers/Metal/GSMTLDeviceInfo.mm
+++ b/pcsx2/GS/Renderers/Metal/GSMTLDeviceInfo.mm
@@ -152,6 +152,10 @@ static DetectionResult detectIntelGPU(id<MTLDevice> dev, id<MTLLibrary> lib)
 		if ([dev supportsFamily:MTLGPUFamilyApple1])
 			features.framebuffer_fetch = true;
 
+	if (@available(macOS 10.15, iOS 13.0, *))
+		if ([dev supportsFamily:MTLGPUFamilyMac2] || [dev supportsFamily:MTLGPUFamilyApple1])
+			features.has_fast_half = true; // Approximate guess
+
 	features.shader_version = detectLibraryVersion(shaders);
 	if (features.framebuffer_fetch && features.shader_version < MetalVersion::Metal23)
 	{
diff --git a/pcsx2/GS/Renderers/Metal/GSMTLSharedHeader.h b/pcsx2/GS/Renderers/Metal/GSMTLSharedHeader.h
index 5ff545b8d0fe3..05634115fb6f9 100644
--- a/pcsx2/GS/Renderers/Metal/GSMTLSharedHeader.h
+++ b/pcsx2/GS/Renderers/Metal/GSMTLSharedHeader.h
@@ -57,6 +57,13 @@ struct GSMTLInterlacePSUniform
 	vector_float4 ZrH;
 };
 
+struct GSMTLCASPSUniform
+{
+	vector_uint4 const0;
+	vector_uint4 const1;
+	vector_int2 srcOffset;
+};
+
 struct GSMTLMainVertex
 {
 	vector_float2 st;
@@ -132,6 +139,7 @@ enum class GSMTLExpandType : unsigned char
 
 enum GSMTLFnConstants
 {
+	GSMTLConstantIndex_CAS_SHARPEN_ONLY,
 	GSMTLConstantIndex_SCALING_FACTOR,
 	GSMTLConstantIndex_FRAMEBUFFER_FETCH,
 	GSMTLConstantIndex_FST,
diff --git a/pcsx2/GS/Renderers/Metal/cas.metal b/pcsx2/GS/Renderers/Metal/cas.metal
new file mode 100644
index 0000000000000..b45c1f6c2afbb
--- /dev/null
+++ b/pcsx2/GS/Renderers/Metal/cas.metal
@@ -0,0 +1,112 @@
+/*  PCSX2 - PS2 Emulator for PCs
+ *  Copyright (C) 2002-2022 PCSX2 Dev Team
+ *
+ *  PCSX2 is free software: you can redistribute it and/or modify it under the terms
+ *  of the GNU Lesser General Public License as published by the Free Software Found-
+ *  ation, either version 3 of the License, or (at your option) any later version.
+ *
+ *  PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
+ *  without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ *  PURPOSE.  See the GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along with PCSX2.
+ *  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#define A_GPU 1
+#define A_MSL 1
+#define A_HALF 1
+
+#include "../../../../bin/resources/shaders/common/ffx_a.h"
+
+struct CASTextureF
+{
+	const thread texture2d<float, access::read>& tex;
+	uint2 offset;
+};
+
+struct CASTextureH
+{
+	const thread texture2d<half, access::read>& tex;
+	ushort2 offset;
+};
+
+#define CAS_TEXTURE CASTextureF
+#define CAS_TEXTUREH CASTextureH
+
+A_STATIC AF3 CasLoad(CASTextureF tex, ASU2 coord)
+{
+	return tex.tex.read(AU2(coord) + tex.offset).rgb;
+}
+#define CasInput(r,g,b)
+
+A_STATIC AH3 CasLoadH(CASTextureH tex, ASW2 coord)
+{
+	return tex.tex.read(AW2(coord) + tex.offset).rgb;
+}
+
+A_STATIC void CasInputH(inoutAH2 r, inoutAH2 g, inoutAH2 b){}
+
+#include "../../../../bin/resources/shaders/common/ffx_cas.h"
+
+#include "GSMTLShaderCommon.h"
+
+constant bool CAS_SHARPEN_ONLY [[function_constant(GSMTLConstantIndex_CAS_SHARPEN_ONLY)]];
+
+kernel void CASFloat(
+	uint2 localID [[thread_position_in_threadgroup]],
+	uint2 workgroupID [[threadgroup_position_in_grid]],
+	texture2d<float, access::read> input [[texture(0)]],
+	texture2d<float, access::write> output [[texture(1)]],
+	constant GSMTLCASPSUniform& cb [[buffer(GSMTLBufferIndexUniforms)]])
+{
+	// Do remapping of local xy in workgroup for a more PS-like swizzle pattern.
+	AU2 gxy = ARmp8x8(localID.x) + (workgroupID << 4);
+	const AU4 const0 = cb.const0;
+	const AU4 const1 = cb.const1;
+	const CASTextureF tex{input, AU2(cb.srcOffset)};
+
+	// Filter.
+	float r, g, b;
+
+	CasFilter(tex, r, g, b, gxy, const0, const1, CAS_SHARPEN_ONLY);
+	output.write(float4(r, g, b, 1), gxy);
+	gxy.x += 8;
+
+	CasFilter(tex, r, g, b, gxy, const0, const1, CAS_SHARPEN_ONLY);
+	output.write(float4(r, g, b, 1), gxy);
+	gxy.y += 8;
+
+	CasFilter(tex, r, g, b, gxy, const0, const1, CAS_SHARPEN_ONLY);
+	output.write(float4(r, g, b, 1), gxy);
+	gxy.x -= 8;
+
+	CasFilter(tex, r, g, b, gxy, const0, const1, CAS_SHARPEN_ONLY);
+	output.write(float4(r, g, b, 1), gxy);
+}
+
+kernel void CASHalf(
+	uint2 localID [[thread_position_in_threadgroup]],
+	uint2 workgroupID [[threadgroup_position_in_grid]],
+	texture2d<half, access::read> input [[texture(0)]],
+	texture2d<half, access::write> output [[texture(1)]],
+	constant GSMTLCASPSUniform& cb [[buffer(GSMTLBufferIndexUniforms)]])
+{
+	// Do remapping of local xy in workgroup for a more PS-like swizzle pattern.
+	AU2 gxy = ARmp8x8(localID.x) + (workgroupID << 4);
+	const AU4 const0 = cb.const0;
+	const AU4 const1 = cb.const1;
+	const CASTextureH tex{input, AW2(cb.srcOffset)};
+
+	// Filter.
+	half2 r, g, b;
+
+	#pragma unroll
+	for (int i = 0; i < 2; i++)
+	{
+		CasFilterH(tex, r, g, b, gxy, const0, const1, CAS_SHARPEN_ONLY);
+		output.write(half4(r.x, g.x, b.x, 1), gxy);
+		output.write(half4(r.y, g.y, b.y, 1), gxy + AU2(8, 0));
+		gxy.y += 8;
+	}
+}