-
Notifications
You must be signed in to change notification settings - Fork 7
Reverse engineering CRF
References:
https://msdn.microsoft.com/en-us/library/windows/desktop/bb509656%28v=vs.85%29.aspx
Specific references:
Mapping texels to pixels https://msdn.microsoft.com/en-us/library/windows/desktop/bb219690%28v=vs.85%29.aspx
Destination register masks https://msdn.microsoft.com/en-us/library/windows/desktop/bb172949%28v=vs.85%29.aspx
How input and output semantics work https://msdn.microsoft.com/en-us/library/windows/desktop/bb944006%28v=vs.85%29.aspx
Learn Vertex & Pixel Shader Programming With DirectX 9 - great examples of shader asm
Shaders for Game Programmers and Artists - discussion about bump map vs normal map
Real Time Rendering - more theoretical description of normal mapping
ShaderX 1 - example of using D3DCOLOR to store normals, tangents and bitangents, example of compressed vertex stream declaration
Real Time shader Programming - transforming normal vecotrs
Data Structures and Algorithms for Game Developers - some details about DXT compression and normal maps
Real Time Rendering Tricks and Techniques in DirectX - extensive examples of pixel shaders and bump mapping
cX registers are constants, rX registers are temporary registers
Models that don't have skeletons have the following vertex buffer layout:
/* Specify buffer layout with type/name pairs, e.g. "float3 position;"
HLSL Base types: bool, byte, short, int, half, float, double
HLSL Vector Types: float3, vector<uint,3>, float3x3, matrix<xshort,2,2>
Modifiers for byte/short/int: s=signed, u=unsigned, x=unsigned hex */
float3 position;
ubyte4 normal;
ubyte4 specular;
ushort2 uv0;
ushort2 uv1;
ubyte4 blendweight;
Vertex shader float constants:
c0 = -0.895 -0.053 -0.443 -633.405
c1 = 0.003 0.992 -0.124 2.008
c2 = 0.446 -0.113 -0.888 -1301.420
c3 = -0.117 0.000 2.340 2943.542
c4 = -3.797 1.701 -0.190 -2779.526
c5 = -0.409 -0.916 -0.020 226.678
c6 = -0.408 -0.913 -0.020 232.955
c10 = 0 0 0 0
c11 = 0.500, 0.500, 0.500, 0.500
c12 = 0.500, 0.500, 0.500, 0.500
c13 = 1.000 1.000 0.000 0.000 // some kind of scaling factor for diffuse UVs?
Specular constant (defined in object materials) is passed through pixel shader constant float register c8. Example, USP 45 has a specular constant of (0.170, 0.170, 0.170, 0). Looking at device state at the call to DrawIndexedPrimitive that constant will be in that register.
Pixel shader float constants:
Register 0 1 2 3
c0 = -6.435822E+08 5E+08 -5.794842E+08 0
c1 = 0.3247058 0.3458823 0.4235294 0
c2 = -636.7696 -18.1452 -1360.013 0.0001511053
c3 = 0.4 0.4 0.35 1
c4 = -686.0895 -39.44279 -1283.386 0.000144461
c5 = 0.32 0.32 0.4 1
c6 = 0.2941177 0.345098 0.6862745 0
c7 = 0.09019608 0.09019608 0.09019608 0
c8 = 0.17022 0.17022 0.17022 0
c9 = 0 0 0 0
Vertex shader for USP 45:
vs_3_0
// c14 is a constant
def c14, 2, -1, 1, 0 // c14.xyzw = {2,-1,1,0}
// vertex position in register v0
dcl_position v0
// blendweight1 in register v1
dcl_blendweight1 v1
dcl_texcoord v2 // UVs (texture coordinates)
dcl_texcoord1 v3 // 2nd set of UVs
dcl_color v4 // normals
dcl_color1 v5 // tangents
dcl_position o0 // output transformed vertex position
dcl_texcoord o1 // output {non-scaled diffuse UVs, non-scaled 2nd set of UVs}
dcl_texcoord1 o2.xyz // some normalized vector
dcl_texcoord2 o3.xyz // output vector from position to something
dcl_texcoord3 o4.xyz // output transformed tangent
dcl_texcoord4 o5.xyz // output transformed blendweights
dcl_texcoord5 o6.xyz // output transformed normal
dcl_texcoord6 o7 // some kind of constant, not used in ps
dcl_texcoord8 o8
dcl_texcoord9 o9.xy // output scaled diffuse UVs
// transform normals
// mad = multiply add
// Expand from compressed D3DCOLOR to –1 to 1 range
mad r0.xyz, v4.zyxw, c14.x, c14.y // r0.x = normal.z * 2 - 1
// r0.y = normal.y * 2 - 1
// r0.z = normal.x * 2 - 1
// r0.w is not updated due to mask!
// dp3 = 3 element dot product
// since v4 has the normals,
// it looks like WorldViewProjection transform is orthogonal since the transformed normal was not renormalized
// If 3x3 matrix only contains rotations, it's transpose = inverse,
// so another theory is that c0,c1,c2 is only WorldTransform (Transpose) matrix since it's a 3x3 instead of 4x4
// m3x3 r0, c0 (macro)
dp3 o6.x, r0, c0 // o6.x = r0.x*c0.x + r0.y*c0.y + r0.z*c0.z
dp3 o6.y, r0, c1
dp3 o6.z, r0, c2
mov r0.w, c14.z // r0.w = 1
// transform vertex positions
mad r1, v0.xyzx, c14.zzzw, c14.wwwz // r1.x = position.x * 1 + 0
// r1.y = position.y * 1 + 0
// r1.z = position.z * 1 + 0
// r1.w = position.x * 0 + 1
// dp4 = 4 element dot product
// Here the vertex must be transformed by the transform matrix, but I am not sure how it's constructed.
// Maybe c0,c1,c2 is the WorldTransform and c3,c4,c5,c6 is ViewTransform*Projection?
// m3x3 r1, c0 (macro)
dp4 r0.x, r1, c0 // r0.x = r1.x*c0.x + r1.y*c0.y + r1.z*c0.z + r1.w*c0.w
dp4 r0.z, r1, c2
dp4 r0.y, r1, c1
// m4x4 r0, c3 (macro)
dp4 o0.x, r0, c3 // output position = r0 dot product c3
dp4 o0.y, r0, c4
dp4 o0.z, r0, c5
dp4 o0.w, r0, c6
dp4 r0.w, r0, c10
mad_sat r0.w, r0.w, c8.x, c8.y
// Expand from compressed D3DCOLOR to –1 to 1 range
mad r1.xyz, v5.zyxw, c14.x, c14.y // r1.x = v5.z * 2 - 1
// r1.y = v5.y * 2 - 1
// r1.z = v5.x * 2 - 1
// transform tangents
// m3x3 r1, c0 (macro)
dp3 o4.x, r1, c0
dp3 o4.y, r1, c1
dp3 o4.z, r1, c2
// Expand from compressed D3DCOLOR to –1 to 1 range
mad r1.xyz, v1.zyxw, c14.x, c14.y // r1.x = v1.z * 2 - 1
// r1.y = v1.y * 2 - 1
// r1.z = v1.x * 2 - 1
// transform blendweights
// m3x3 r1, c0 (macro)
dp3 o5.x, r1, c0
dp3 o5.y, r1, c1
dp3 o5.z, r1, c2
mad r1.x, r0.y, c8.z, c8.w
add r1.y, -r0.w, c14.z
mul_sat r1.x, r1.x, r1.y
add_sat o8.w, r0.w, -r1.x
mad o1.zw, v3.xyxy, c12.xyxy, c12 // output texcord1.z = v3.x * 0.5 + 0.5
// output texcord1.w = v3.y * 0.5 + 0.5
// is adjustment by 0.5 for mapping texels to pixels?
mad r1.xy, v2, c11, c11.zwzw // temp1.x = v2.x * 0.5 + 0.5
// temp1.y = v2.y * 0.5 + 0.5
mul o9.xy, r1, c13 // output texcoord09.x = temp1.x * 1
// output texcoord09.y = temp1.y * 1
mov o1.xy, r1 // output texcoord01 (UVs) = temp1
add r1.xyz, r0, -c7 // c7 is either light, camera or most likely eye position
mov o3.xyz, r0
dp3 r0.x, r1, r1 // r0.x = r1.x*r1.x + r1.y*r1.y + r1.z*r1.z (sum of squares)
rsq r0.x, r0.x // r0.x = 1 / sqrt(r0.x) = 1/d
mul o2.xyz, r1, r0.x // o2.x = r1.x * 1/d
// o2.y = r1.y * 1/d
// o2.z = r1.z * 1/d (normalized each component)
mov o7, c14.zzww
mov o8.xyz, c9
// approximately 38 instruction slots used
Pixel shader
ps_3_0
def c10, 2, -1, 1, 0.5
def c11, 0, 32, 0, 0
dcl_texcoord v0.xy // non-scaled diffuse UVs
dcl_texcoord1 v1.xyz //
dcl_texcoord2 v2.xyz
dcl_texcoord3 v3.xyz // transformed tangents
dcl_texcoord4 v4.xyz // transformed blendweights
dcl_texcoord5 v5.xyz // transformed normals
dcl_texcoord8 v6
dcl_texcoord9 v7.xy // scaled diffuse UVs
// 2d samplers, only two textures are used
dcl_2d s0
dcl_2d s1
add r0.xyz, c2, -v2
dp3 r0.w, r0, r0 // r0.w = r0.x*r0.x + r0.y*r0.y + r0.z*r0.z
mov r1.z, c10.z // r1.z = 1
mad r1.x, r0.w, -c2.w, r1.z // r1.x = r0.w * -c2.w + 1
rsq r0.w, r0.w // r0.w = 1/sqrt(r0.w)
max r2.x, r1.x, c11.x // r2.x = max(r1.x , 0)
add_sat r1.x, r2.x, r2.x
mul r1.xyw, r1.x, c3.xyzz
mul r2.xyz, r1.xyww, c8
nrm r3.xyz, v1
mad r4.xyz, r0, r0.w, -r3
mul r0.xyz, r0, r0.w
nrm r5.xyz, r4
// sample s1 with coordinate v7 and store in r4, v7.w contains the level of details to use for sampling
texld r4, v7, s1
mad r4.xy, r4.wyzw, c10.x, c10.y // r4.x = r4.w * 2 - 1
// r4.y = r4.y * 2 - 1
mul r6.xyz, r4.y, v4
mad r6.xyz, r4.x, v3, r6
dp2add r0.w, r4, -r4, c10.z // r0.w = r4.x*-r4.x + r4.y*-r4.y + 1
rsq r0.w, r0.w // r0.w = 1 / sqrt(r0.w)
rcp r0.w, r0.w // r0.w = sqrt(r0.w)
mad r4.xyz, r0.w, v5, r6
dp3_sat r0.w, r4, r5
pow_sat r2.w, r0.w, c11.y // r2.w = (r0.w)^32
mul r2.xyz, r2, r2.w
mul r5.xyz, r2, c3.w
add r6.xyz, c0, -v2
dp3 r0.w, r6, r6
rsq r2.w, r0.w
mad r0.w, r0.w, -c0.w, r1.z
max r3.w, r0.w, c11.x // r3.w = max(r0.w, 0)
add_sat r0.w, r3.w, r3.w
mul r7.xyz, r0.w, c1
mad r8.xyz, r6, r2.w, -r3
mul r6.xyz, r6, r2.w
dp3_sat r0.w, r4, r6
mul r6.xyz, r7, r0.w
mul r7.xyz, r7, c8
nrm r9.xyz, r8 // r0.xyz = |r8|
dp3_sat r0.w, r4, r9
pow_sat r2.w, r0.w, c11.y // r2.w = (r0.w)^32
mul r7.xyz, r7, r2.w
mad r5.xyz, r7, c1.w, r5
add r8.xyz, c4, -v2
dp3 r0.w, r8, r8
rsq r2.w, r0.w
mad r0.w, r0.w, -c4.w, r1.z
max r3.w, r0.w, c11.x // r3.w = max(r0.2, 0)
add_sat r0.w, r3.w, r3.w
mul r9.xyz, r0.w, c5
mad r3.xyz, r8, r2.w, -r3
mul r8.xyz, r8, r2.w
dp3_sat r0.w, r4, r8
mul r8.xyz, r9, r0.w
mul r9.xyz, r9, c8
nrm r10.xyz, r3
dp3_sat r0.w, r4, r10
pow_sat r2.w, r0.w, c11.y // r2.w = r0.w^32
mul r3.xyz, r9, r2.w
mad r5.xyz, r3, c5.w, r5
add r0.w, r1.z, -c3.w
mul r2.xyz, r2, r0.w
add r2.w, r1.z, -c1.w
mad r2.xyz, r7, r2.w, r2
add r1.z, r1.z, -c5.w
mad r2.xyz, r3, r1.z, r2
add r2.xyz, r2, r2
mad r2.xyz, r5, c10.x, r2 // r2.x = r5.x * 2 - r2.x
// r2.y = r5.y * 2 - r2.y
// r2.z = r5.z * 2 - r2.z
dp3_sat r0.x, r4, r0
mad r0.y, r4.y, c10.w, c10.w // r0.y = r4.y * 0.5 + 0.5
mul r1.xyw, r1, r0.x
mul r0.xzw, r0.w, r1.xyyw
mul r1.xyw, r1, c3.w
mad r1.xyw, r6.xyzz, c1.w, r1
mad r0.xzw, r6.xyyz, r2.w, r0
mad r0.xzw, r8.xyyz, r1.z, r0
mad r1.xyz, r8, c5.w, r1.xyww
add r0.xzw, r0, r0
mad r0.xzw, r1.xyyz, c10.x, r0 // r0.x = r1.x * 2 + r0.x
// r0.z = r1.y * 2 + r0.y
// r0.w = r1.y * 2 + r0.z
mov r1.xyz, c7
add r1.xyz, -r1, c6
mad r1.xyz, r0.y, r1, c7
add r0.xyz, r0.xzww, r1
mov r0.w, c8.w
mad r0.xyz, c9, r0.w, r0
// sample s0 with coordinate v0 and store in r1
texld r1, v0, s0
mad r0.xyz, r1, r0, r2
mov oC0.w, r1.w
add r0.xyz, r0, -v6
// this sets the final output color
mad oC0.xyz, v6.w, r0, v6
// approximately 104 instruction slots used (2 texture, 102 arithmetic)