Reverse engineering CRF

References:
https://msdn.microsoft.com/en-us/library/windows/desktop/bb509656%28v=vs.85%29.aspx
Specific references: Mapping texels to pixels https://msdn.microsoft.com/en-us/library/windows/desktop/bb219690%28v=vs.85%29.aspx Destination register masks https://msdn.microsoft.com/en-us/library/windows/desktop/bb172949%28v=vs.85%29.aspx
How input and output semantics work https://msdn.microsoft.com/en-us/library/windows/desktop/bb944006%28v=vs.85%29.aspx
Learn Vertex & Pixel Shader Programming With DirectX 9 - great examples of shader asm
Shaders for Game Programmers and Artists - discussion about bump map vs normal map
Real Time Rendering - more theoretical description of normal mapping
ShaderX 1 - example of using D3DCOLOR to store normals, tangents and bitangents, example of compressed vertex stream declaration
Real Time shader Programming - transforming normal vecotrs
Data Structures and Algorithms for Game Developers - some details about DXT compression and normal maps
Real Time Rendering Tricks and Techniques in DirectX - extensive examples of pixel shaders and bump mapping

cX registers are constants, rX registers are temporary registers

Models that don't have skeletons have the following vertex buffer layout:

/* Specify buffer layout with type/name pairs, e.g. "float3 position;"
   HLSL Base types: bool, byte, short, int, half, float, double
   HLSL Vector Types: float3, vector<uint,3>, float3x3, matrix<xshort,2,2>
   Modifiers for byte/short/int: s=signed, u=unsigned, x=unsigned hex */

float3 position;
ubyte4 normal;
ubyte4 specular;
ushort2 uv0;
ushort2 uv1;
ubyte4 blendweight;

Vertex shader float constants:
c0 = -0.895	-0.053	-0.443	-633.405
c1 = 0.003	0.992	-0.124	2.008
c2 = 0.446	-0.113	-0.888	-1301.420

c3 = -0.117	0.000	2.340	2943.542
c4 = -3.797	1.701	-0.190	-2779.526
c5 = -0.409	-0.916	-0.020	226.678
c6 = -0.408	-0.913	-0.020	232.955

c10 = 0   0 0 0 

c11 = 0.500, 0.500, 0.500, 0.500
c12 = 0.500, 0.500, 0.500, 0.500

c13 = 1.000	1.000	0.000	0.000 // some kind of scaling factor for diffuse UVs?

Specular constant (defined in object materials) is passed through pixel shader constant float register c8. Example, USP 45 has a specular constant of (0.170, 0.170, 0.170, 0). Looking at device state at the call to DrawIndexedPrimitive that constant will be in that register.

Pixel shader float constants:
Register	0	1	2	3
c0 = -6.435822E+08	5E+08	-5.794842E+08	0
c1 = 0.3247058	0.3458823	0.4235294	0
c2 = -636.7696	-18.1452	-1360.013	0.0001511053
c3 = 0.4	0.4	0.35	1
c4 = -686.0895	-39.44279	-1283.386	0.000144461
c5 = 0.32	0.32	0.4	1
c6 = 0.2941177	0.345098	0.6862745	0
c7 = 0.09019608	0.09019608	0.09019608	0
c8 = 0.17022	0.17022	0.17022	0
c9 = 0	0	0	0

Vertex shader for USP 45:

    vs_3_0
    // c14 is a constant 
    def c14, 2, -1, 1, 0 // c14.xyzw = {2,-1,1,0}
    // vertex position in register v0
    dcl_position v0
    // blendweight1 in register v1
    dcl_blendweight1 v1
    dcl_texcoord v2    // UVs (texture coordinates)
    dcl_texcoord1 v3   // 2nd set of UVs
    dcl_color v4       // normals
    dcl_color1 v5      // tangents
    dcl_position o0    // output transformed vertex position
    dcl_texcoord o1         // output {non-scaled diffuse UVs, non-scaled 2nd set of UVs}
    dcl_texcoord1 o2.xyz    // some normalized vector
    dcl_texcoord2 o3.xyz    // output vector from position to something
    dcl_texcoord3 o4.xyz    // output transformed tangent
    dcl_texcoord4 o5.xyz    // output transformed blendweights
    dcl_texcoord5 o6.xyz    // output transformed normal
    dcl_texcoord6 o7        // some kind of constant, not used in ps
    dcl_texcoord8 o8
    dcl_texcoord9 o9.xy     // output scaled diffuse UVs

    // transform normals
    // mad = multiply add
    // Expand from compressed D3DCOLOR to –1 to 1 range
    mad r0.xyz, v4.zyxw, c14.x, c14.y    // r0.x = normal.z * 2 - 1
                                         // r0.y = normal.y * 2 - 1
                                         // r0.z = normal.x * 2 - 1
                                         // r0.w is not updated due to mask!
    // dp3 = 3 element dot product
    // since v4 has the normals, 
    // it looks like WorldViewProjection transform is orthogonal since the transformed normal was not renormalized
    // If 3x3 matrix only contains rotations, it's transpose = inverse,
    // so another theory is that c0,c1,c2 is only WorldTransform (Transpose) matrix since it's a 3x3 instead of 4x4
    // m3x3 r0, c0 (macro)
    dp3 o6.x, r0, c0    // o6.x = r0.x*c0.x  + r0.y*c0.y + r0.z*c0.z
    dp3 o6.y, r0, c1
    dp3 o6.z, r0, c2
    mov r0.w, c14.z     // r0.w = 1   

    // transform vertex positions
    mad r1, v0.xyzx, c14.zzzw, c14.wwwz    // r1.x = position.x * 1 + 0
                                           // r1.y = position.y * 1 + 0
                                           // r1.z = position.z * 1 + 0
                                           // r1.w = position.x * 0 + 1
    // dp4 = 4 element dot product
    // Here the vertex must be transformed by the transform matrix, but I am not sure how it's constructed.
    // Maybe c0,c1,c2 is the WorldTransform and c3,c4,c5,c6 is ViewTransform*Projection?
    // m3x3 r1, c0 (macro)
    dp4 r0.x, r1, c0    // r0.x = r1.x*c0.x + r1.y*c0.y + r1.z*c0.z + r1.w*c0.w
    dp4 r0.z, r1, c2
    dp4 r0.y, r1, c1    
    // m4x4 r0, c3 (macro)
    dp4 o0.x, r0, c3   // output position = r0 dot product c3
    dp4 o0.y, r0, c4
    dp4 o0.z, r0, c5
    dp4 o0.w, r0, c6
    dp4 r0.w, r0, c10
    mad_sat r0.w, r0.w, c8.x, c8.y
    // Expand from compressed D3DCOLOR to –1 to 1 range
    mad r1.xyz, v5.zyxw, c14.x, c14.y    // r1.x = v5.z * 2 - 1
                                         // r1.y = v5.y * 2 - 1
                                         // r1.z = v5.x * 2 - 1
    // transform tangents
    // m3x3 r1, c0 (macro)
    dp3 o4.x, r1, c0
    dp3 o4.y, r1, c1
    dp3 o4.z, r1, c2
    // Expand from compressed D3DCOLOR to –1 to 1 range
    mad r1.xyz, v1.zyxw, c14.x, c14.y    // r1.x = v1.z * 2 - 1
                                         // r1.y = v1.y * 2 - 1
                                         // r1.z = v1.x * 2 - 1

    // transform blendweights
    // m3x3 r1, c0 (macro)
    dp3 o5.x, r1, c0
    dp3 o5.y, r1, c1
    dp3 o5.z, r1, c2
    mad r1.x, r0.y, c8.z, c8.w
    add r1.y, -r0.w, c14.z
    mul_sat r1.x, r1.x, r1.y
    add_sat o8.w, r0.w, -r1.x
    mad o1.zw, v3.xyxy, c12.xyxy, c12    // output texcord1.z = v3.x * 0.5 + 0.5
                                         // output texcord1.w = v3.y * 0.5 + 0.5
    // is adjustment by 0.5 for mapping texels to pixels?
    mad r1.xy, v2, c11, c11.zwzw    // temp1.x = v2.x * 0.5 + 0.5
                                    // temp1.y = v2.y * 0.5 + 0.5
    mul o9.xy, r1, c13              // output texcoord09.x = temp1.x * 1
                                    // output texcoord09.y = temp1.y * 1
    mov o1.xy, r1                   // output texcoord01 (UVs) = temp1
    add r1.xyz, r0, -c7             // c7 is either light, camera or most likely eye position
    mov o3.xyz, r0
    dp3 r0.x, r1, r1                // r0.x = r1.x*r1.x + r1.y*r1.y + r1.z*r1.z (sum of squares)
    rsq r0.x, r0.x                  // r0.x = 1 / sqrt(r0.x) = 1/d
    mul o2.xyz, r1, r0.x            // o2.x = r1.x * 1/d
                                    // o2.y = r1.y * 1/d
                                    // o2.z = r1.z * 1/d (normalized each component)
    mov o7, c14.zzww
    mov o8.xyz, c9

// approximately 38 instruction slots used

Pixel shader

    ps_3_0
    def c10, 2, -1, 1, 0.5
    def c11, 0, 32, 0, 0
    dcl_texcoord v0.xy     // non-scaled diffuse UVs
    dcl_texcoord1 v1.xyz   // 
    dcl_texcoord2 v2.xyz
    dcl_texcoord3 v3.xyz   // transformed tangents
    dcl_texcoord4 v4.xyz   // transformed blendweights 
    dcl_texcoord5 v5.xyz   // transformed normals
    dcl_texcoord8 v6
    dcl_texcoord9 v7.xy    // scaled diffuse UVs
    // 2d samplers, only two textures are used
    dcl_2d s0
    dcl_2d s1
    add r0.xyz, c2, -v2
    dp3 r0.w, r0, r0             // r0.w = r0.x*r0.x + r0.y*r0.y + r0.z*r0.z
    mov r1.z, c10.z              // r1.z = 1
    mad r1.x, r0.w, -c2.w, r1.z  // r1.x = r0.w * -c2.w + 1
    rsq r0.w, r0.w               // r0.w = 1/sqrt(r0.w)
    max r2.x, r1.x, c11.x        // r2.x = max(r1.x , 0)
    add_sat r1.x, r2.x, r2.x
    mul r1.xyw, r1.x, c3.xyzz
    mul r2.xyz, r1.xyww, c8
    nrm r3.xyz, v1
    mad r4.xyz, r0, r0.w, -r3
    mul r0.xyz, r0, r0.w
    nrm r5.xyz, r4
    // sample s1 with coordinate v7 and store in r4, v7.w contains the level of details to use for sampling
    texld r4, v7, s1
    mad r4.xy, r4.wyzw, c10.x, c10.y    // r4.x = r4.w * 2 - 1
                                        // r4.y = r4.y * 2 - 1
    mul r6.xyz, r4.y, v4
    mad r6.xyz, r4.x, v3, r6
    dp2add r0.w, r4, -r4, c10.z     // r0.w = r4.x*-r4.x + r4.y*-r4.y + 1
    rsq r0.w, r0.w                  // r0.w = 1 / sqrt(r0.w)
    rcp r0.w, r0.w                  // r0.w  = sqrt(r0.w)
    mad r4.xyz, r0.w, v5, r6
    dp3_sat r0.w, r4, r5
    pow_sat r2.w, r0.w, c11.y       // r2.w = (r0.w)^32
    mul r2.xyz, r2, r2.w
    mul r5.xyz, r2, c3.w
    add r6.xyz, c0, -v2
    dp3 r0.w, r6, r6
    rsq r2.w, r0.w
    mad r0.w, r0.w, -c0.w, r1.z
    max r3.w, r0.w, c11.x           // r3.w = max(r0.w, 0)
    add_sat r0.w, r3.w, r3.w
    mul r7.xyz, r0.w, c1
    mad r8.xyz, r6, r2.w, -r3
    mul r6.xyz, r6, r2.w
    dp3_sat r0.w, r4, r6
    mul r6.xyz, r7, r0.w
    mul r7.xyz, r7, c8
    nrm r9.xyz, r8                  // r0.xyz = |r8|
    dp3_sat r0.w, r4, r9
    pow_sat r2.w, r0.w, c11.y       // r2.w = (r0.w)^32
    mul r7.xyz, r7, r2.w
    mad r5.xyz, r7, c1.w, r5
    add r8.xyz, c4, -v2
    dp3 r0.w, r8, r8
    rsq r2.w, r0.w
    mad r0.w, r0.w, -c4.w, r1.z
    max r3.w, r0.w, c11.x           // r3.w = max(r0.2, 0)
    add_sat r0.w, r3.w, r3.w
    mul r9.xyz, r0.w, c5
    mad r3.xyz, r8, r2.w, -r3
    mul r8.xyz, r8, r2.w
    dp3_sat r0.w, r4, r8
    mul r8.xyz, r9, r0.w
    mul r9.xyz, r9, c8
    nrm r10.xyz, r3
    dp3_sat r0.w, r4, r10
    pow_sat r2.w, r0.w, c11.y       // r2.w = r0.w^32
    mul r3.xyz, r9, r2.w
    mad r5.xyz, r3, c5.w, r5
    add r0.w, r1.z, -c3.w
    mul r2.xyz, r2, r0.w
    add r2.w, r1.z, -c1.w
    mad r2.xyz, r7, r2.w, r2
    add r1.z, r1.z, -c5.w
    mad r2.xyz, r3, r1.z, r2
    add r2.xyz, r2, r2
    mad r2.xyz, r5, c10.x, r2       // r2.x = r5.x * 2 - r2.x
                                    // r2.y = r5.y * 2 - r2.y
                                    // r2.z = r5.z * 2 - r2.z
    dp3_sat r0.x, r4, r0
    mad r0.y, r4.y, c10.w, c10.w    // r0.y = r4.y * 0.5 + 0.5
    mul r1.xyw, r1, r0.x
    mul r0.xzw, r0.w, r1.xyyw
    mul r1.xyw, r1, c3.w
    mad r1.xyw, r6.xyzz, c1.w, r1
    mad r0.xzw, r6.xyyz, r2.w, r0
    mad r0.xzw, r8.xyyz, r1.z, r0
    mad r1.xyz, r8, c5.w, r1.xyww
    add r0.xzw, r0, r0
    mad r0.xzw, r1.xyyz, c10.x, r0  // r0.x = r1.x * 2 + r0.x
                                    // r0.z = r1.y * 2 + r0.y
                                    // r0.w = r1.y * 2 + r0.z
    mov r1.xyz, c7
    add r1.xyz, -r1, c6
    mad r1.xyz, r0.y, r1, c7
    add r0.xyz, r0.xzww, r1
    mov r0.w, c8.w
    mad r0.xyz, c9, r0.w, r0
    // sample s0 with coordinate v0 and store in r1
    texld r1, v0, s0
    mad r0.xyz, r1, r0, r2
    mov oC0.w, r1.w
    add r0.xyz, r0, -v6
    // this sets the final output color
    mad oC0.xyz, v6.w, r0, v6

// approximately 104 instruction slots used (2 texture, 102 arithmetic)

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Reverse engineering CRF

Clone this wiki locally