Reverse engineering CRF
References:
https://msdn.microsoft.com/en-us/library/windows/desktop/bb509656%28v=vs.85%29.aspx
Specific references:
Mapping texels to pixels: https://msdn.microsoft.com/en-us/library/windows/desktop/bb219690%28v=vs.85%29.aspx
Destination register masks: https://msdn.microsoft.com/en-us/library/windows/desktop/bb172949%28v=vs.85%29.aspx
How input and output semantics work: https://msdn.microsoft.com/en-us/library/windows/desktop/bb944006%28v=vs.85%29.aspx
D3D types: https://msdn.microsoft.com/en-us/library/windows/desktop/bb172533%28v=vs.85%29.aspx
D3DVERTEXELEMENT9 structure: https://msdn.microsoft.com/en-us/library/windows/desktop/bb172630%28v=vs.85%29.aspx
Learn Vertex & Pixel Shader Programming With DirectX 9 - great examples of shader asm
Shaders for Game Programmers and Artists - discussion about bump map vs normal map
Real Time Rendering - more theoretical description of normal mapping
ShaderX 1 - example of using D3DCOLOR to store normals, tangents and bitangents, example of compressed vertex stream declaration
Real Time shader Programming - transforming normal vecotrs
Data Structures and Algorithms for Game Developers - some details about DXT compression and normal maps
Real Time Rendering Tricks and Techniques in DirectX - extensive examples of pixel shaders and bump mapping
cX registers are constants, rX registers are temporary registers
Models that don't have skeletons have the following vertex buffer layout:
Stream | Offset | Type | Method | Usage | UsageIndex | Remark |
---|---|---|---|---|---|---|
0 | 0 | FLOAT3 | DEFAULT | POSITION | 0 | Vertex position |
0 | 12 | D3DCOLOR | DEFAULT | COLOR | 0 | Normal |
0 | 16 | D3DCOLOR | DEFAULT | COLOR | 1 | Unknown (Tangents?) |
0 | 20 | SHORT2N | DEFAULT | TEXCOORD | 0 | Texture coordinates |
0 | 24 | SHORT2N | DEFAULT | TEXCOORD | 1 | Unknown |
0 | 28 | D3DCOLOR | DEFAULT | BLENDWEIGHT | 1 | Unknown |
D3DDECL_END |
/* Specify buffer layout with type/name pairs, e.g. "float3 position;"
HLSL Base types: bool, byte, short, int, half, float, double
HLSL Vector Types: float3, vector<uint,3>, float3x3, matrix<xshort,2,2>
Modifiers for byte/short/int: s=signed, u=unsigned, x=unsigned hex */
float3 position;
ubyte4 normal;
ubyte4 tangent;
ushort2 uv0;
ushort2 uv1;
ubyte4 blendweight;
Vertex shader float constant registers:
Register | Remark | ||||
---|---|---|---|---|---|
0 | -0.895 | -0.053 | -0.443 | -633.405 | World Transform |
1 | 0.003 | 0.992 | -0.124 | 2.008 | World Transform |
2 | 0.446 | -0.113 | -0.888 | -1301.420 | World Transform |
3 | -0.117 | 0.000 | 2.340 | 2943.542 | View Transform? |
4 | -3.797 | 1.701 | -0.190 | -2779.526 | View Transform? |
5 | -0.409 | -0.916 | -0.020 | 226.678 | View Transform? |
6 | -0.408 | -0.913 | -0.020 | 232.955 | View Transform? |
7 | -450.501 | 485.219 | -1280.439 | 0.000 | Eye, camera or light direction? |
8 | -0.001 | 1.490 | -0.009 | 0.909 | |
9 | 0.173 | 0.212 | 0.365 | 0.000 | Eye, camera or light direction? |
10 | -0.408 | -0.913 | -0.020 | 232.955 | |
11 | 0.500 | 0.500 | 0.500 | 0.500 | |
12 | 0.500 | 0.500 | 0.500 | 0.500 | |
13 | 1.000 | 1.000 | 0.000 | 0.000 | some kind of scaling factor for diffuse UVs? |
14 | 2.000 | -1.000 | 1.000 | 0.000 | Set in vertex shader |
Specular constant (defined in object materials) is passed through pixel shader constant float register c8. Example, USP 45 has a specular constant of (0.170, 0.170, 0.170, 0). Looking at device state at the call to DrawIndexedPrimitive that constant will be in that register.
Pixel shader float constant registers:
Register | Remark | ||||
---|---|---|---|---|---|
0 | -643582208.000 | 500000000.000 | -579484160.000 | 0.000 | |
1 | 0.325 | 0.346 | 0.424 | 0.000 | |
2 | -661.576 | -19.550 | -1353.910 | 0.000 | |
3 | 0.400 | 0.400 | 0.350 | 1.000 | |
4 | -671.998 | -40.875 | -1263.387 | 0.000 | |
5 | 0.320 | 0.320 | 0.400 | 1.000 | |
6 | 0.294 | 0.345 | 0.686 | 0.000 | |
7 | 0.090 | 0.090 | 0.090 | 0.000 | |
8 | 0.170 | 0.170 | 0.170 | 0.000 | specular constant? |
9 | 0.000 | 0.000 | 0.000 | 0.000 | |
10 | 0.090 | 0.090 | 0.090 | 0.000 | |
11 | 3.000 | 15.000 | 0.100 | 0.200 | |
12 | 0.120 | 0.300 | 0.900 | 0.000 | |
13 | 0.000 | 0.000 | 0.000 | 0.000 |
Vertex shader:
vs_3_0
// c14 is a constant
def c14, 2, -1, 1, 0 // c14.xyzw = {2,-1,1,0}
dcl_position v0 // vertex position in register v0
dcl_blendweight1 v1 // blendweight1 in register v1
dcl_texcoord v2 // UVs (texture coordinates)
dcl_texcoord1 v3 // 2nd set of UVs?
dcl_color v4 // normals
dcl_color1 v5 // tangents?
dcl_position o0 // output transformed vertex position
dcl_texcoord o1 // output {non-scaled diffuse UVs, non-scaled 2nd set of UVs}
dcl_texcoord1 o2.xyz // some normalized vector
dcl_texcoord2 o3.xyz // output vector from position to something
dcl_texcoord3 o4.xyz // output transformed tangent
dcl_texcoord4 o5.xyz // output transformed blendweights
dcl_texcoord5 o6.xyz // output transformed normal
dcl_texcoord6 o7 // some kind of constant, not used in ps
dcl_texcoord8 o8
dcl_texcoord9 o9.xy // output scaled diffuse UVs
// transform normals
// mad = multiply add
// Expand from compressed D3DCOLOR to –1 to 1 range
mad r0.xyz, v4.zyxw, c14.x, c14.y // r0.x = normal.z * 2 - 1
// r0.y = normal.y * 2 - 1
// r0.z = normal.x * 2 - 1
// r0.w is not updated due to mask!
// dp3 = 3 element dot product
// since v4 has the normals,
// it looks like WorldViewProjection transform is orthogonal since the transformed normal was not renormalized
// If 3x3 matrix only contains rotations, it's transpose = inverse,
// so another theory is that c0,c1,c2 is only WorldTransform (Transpose) matrix since it's a 3x3 instead of 4x4
// m3x3 r0, c0 (macro)
dp3 o6.x, r0, c0 // o6.x = r0.x*c0.x + r0.y*c0.y + r0.z*c0.z
dp3 o6.y, r0, c1
dp3 o6.z, r0, c2
mov r0.w, c14.z // r0.w = 1
// transform vertex positions
mad r1, v0.xyzx, c14.zzzw, c14.wwwz // r1.x = position.x * 1 + 0
// r1.y = position.y * 1 + 0
// r1.z = position.z * 1 + 0
// r1.w = position.x * 0 + 1
// dp4 = 4 element dot product
// Here the vertex must be transformed by the transform matrix, but I am not sure how it's constructed.
// Maybe c0,c1,c2 is the WorldTransform and c3,c4,c5,c6 is ViewTransform*Projection?
// m3x3 r1, c0 (macro)
dp4 r0.x, r1, c0 // r0.x = r1.x*c0.x + r1.y*c0.y + r1.z*c0.z + r1.w*c0.w
dp4 r0.z, r1, c2
dp4 r0.y, r1, c1
// m4x4 r0, c3 (macro)
dp4 o0.x, r0, c3 // output position = r0 dot product c3
dp4 o0.y, r0, c4
dp4 o0.z, r0, c5
dp4 o0.w, r0, c6
dp4 r0.w, r0, c10
mad_sat r0.w, r0.w, c8.x, c8.y
// transform tangents
// Expand from compressed D3DCOLOR to –1 to 1 range
mad r1.xyz, v5.zyxw, c14.x, c14.y // r1.x = v5.z * 2 - 1
// r1.y = v5.y * 2 - 1
// r1.z = v5.x * 2 - 1
// m3x3 r1, c0 (macro)
dp3 o4.x, r1, c0
dp3 o4.y, r1, c1
dp3 o4.z, r1, c2
// transform blendweights
// Expand from compressed D3DCOLOR to –1 to 1 range
mad r1.xyz, v1.zyxw, c14.x, c14.y // r1.x = v1.z * 2 - 1
// r1.y = v1.y * 2 - 1
// r1.z = v1.x * 2 - 1
// m3x3 r1, c0 (macro)
dp3 o5.x, r1, c0
dp3 o5.y, r1, c1
dp3 o5.z, r1, c2
mad r1.x, r0.y, c8.z, c8.w
add r1.y, -r0.w, c14.z
mul_sat r1.x, r1.x, r1.y
add_sat o8.w, r0.w, -r1.x
// transform 2nd set of UV
mad o1.zw, v3.xyxy, c12.xyxy, c12 // output texcord1.z = v3.x * 0.5 + 0.5
// output texcord1.w = v3.y * 0.5 + 0.5
// transform 1st set of UV
// is adjustment by 0.5 for mapping texels to pixels?
mad r1.xy, v2, c11, c11.zwzw // temp1.x = v2.x * 0.5 + 0.5
// temp1.y = v2.y * 0.5 + 0.5
mul o9.xy, r1, c13 // output texcoord09.x = temp1.x * 1
// output texcoord09.y = temp1.y * 1
mov o1.xy, r1 // output texcoord01 (UVs) = temp1
// calculate and normalize some vector
// c7 is either light, camera or most likely eye position
add r1.xyz, r0, -c7 // vertex world position - c7
mov o3.xyz, r0 // save vertex world position in o3
dp3 r0.x, r1, r1 // r0.x = r1.x*r1.x + r1.y*r1.y + r1.z*r1.z (sum of squares)
rsq r0.x, r0.x // r0.x = 1 / sqrt(r0.x) = 1/d
mul o2.xyz, r1, r0.x // o2.x = r1.x * 1/d
// o2.y = r1.y * 1/d
// o2.z = r1.z * 1/d (normalized each component)
// o2 has a normalized light, camera or eye vector
// unknown
mov o7, c14.zzww
mov o8.xyz, c9
// approximately 38 instruction slots used
Pixel shader:
ps_3_0
def c10, 2, -1, 1, 0.5
def c11, 0, 32, 0, 0
dcl_texcoord v0.xy // non-scaled diffuse UVs
dcl_texcoord1 v1.xyz //
dcl_texcoord2 v2.xyz
dcl_texcoord3 v3.xyz // transformed tangents
dcl_texcoord4 v4.xyz // transformed blendweights
dcl_texcoord5 v5.xyz // transformed normals
dcl_texcoord8 v6
dcl_texcoord9 v7.xy // scaled diffuse UVs
// 2d samplers, only two textures are used
dcl_2d s0
dcl_2d s1
add r0.xyz, c2, -v2
dp3 r0.w, r0, r0 // r0.w = r0.x*r0.x + r0.y*r0.y + r0.z*r0.z
mov r1.z, c10.z // r1.z = 1
mad r1.x, r0.w, -c2.w, r1.z // r1.x = r0.w * -c2.w + 1
rsq r0.w, r0.w // r0.w = 1/sqrt(r0.w)
max r2.x, r1.x, c11.x // r2.x = max(r1.x , 0)
add_sat r1.x, r2.x, r2.x
mul r1.xyw, r1.x, c3.xyzz
mul r2.xyz, r1.xyww, c8
nrm r3.xyz, v1
mad r4.xyz, r0, r0.w, -r3
mul r0.xyz, r0, r0.w
nrm r5.xyz, r4
// sample s1 with coordinate v7 and store in r4, v7.w contains the level of details to use for sampling
texld r4, v7, s1
mad r4.xy, r4.wyzw, c10.x, c10.y // r4.x = r4.w * 2 - 1
// r4.y = r4.y * 2 - 1
mul r6.xyz, r4.y, v4
mad r6.xyz, r4.x, v3, r6
dp2add r0.w, r4, -r4, c10.z // r0.w = r4.x*-r4.x + r4.y*-r4.y + 1
rsq r0.w, r0.w // r0.w = 1 / sqrt(r0.w)
rcp r0.w, r0.w // r0.w = sqrt(r0.w)
mad r4.xyz, r0.w, v5, r6
dp3_sat r0.w, r4, r5
pow_sat r2.w, r0.w, c11.y // r2.w = (r0.w)^32
mul r2.xyz, r2, r2.w
mul r5.xyz, r2, c3.w
add r6.xyz, c0, -v2
dp3 r0.w, r6, r6
rsq r2.w, r0.w
mad r0.w, r0.w, -c0.w, r1.z
max r3.w, r0.w, c11.x // r3.w = max(r0.w, 0)
add_sat r0.w, r3.w, r3.w
mul r7.xyz, r0.w, c1
mad r8.xyz, r6, r2.w, -r3
mul r6.xyz, r6, r2.w
dp3_sat r0.w, r4, r6
mul r6.xyz, r7, r0.w
mul r7.xyz, r7, c8
nrm r9.xyz, r8 // r0.xyz = |r8|
dp3_sat r0.w, r4, r9
pow_sat r2.w, r0.w, c11.y // r2.w = (r0.w)^32
mul r7.xyz, r7, r2.w
mad r5.xyz, r7, c1.w, r5
add r8.xyz, c4, -v2
dp3 r0.w, r8, r8
rsq r2.w, r0.w
mad r0.w, r0.w, -c4.w, r1.z
max r3.w, r0.w, c11.x // r3.w = max(r0.2, 0)
add_sat r0.w, r3.w, r3.w
mul r9.xyz, r0.w, c5
mad r3.xyz, r8, r2.w, -r3
mul r8.xyz, r8, r2.w
dp3_sat r0.w, r4, r8
mul r8.xyz, r9, r0.w
mul r9.xyz, r9, c8
nrm r10.xyz, r3
dp3_sat r0.w, r4, r10
pow_sat r2.w, r0.w, c11.y // r2.w = r0.w^32
mul r3.xyz, r9, r2.w
mad r5.xyz, r3, c5.w, r5
add r0.w, r1.z, -c3.w
mul r2.xyz, r2, r0.w
add r2.w, r1.z, -c1.w
mad r2.xyz, r7, r2.w, r2
add r1.z, r1.z, -c5.w
mad r2.xyz, r3, r1.z, r2
add r2.xyz, r2, r2
mad r2.xyz, r5, c10.x, r2 // r2.x = r5.x * 2 - r2.x
// r2.y = r5.y * 2 - r2.y
// r2.z = r5.z * 2 - r2.z
dp3_sat r0.x, r4, r0
mad r0.y, r4.y, c10.w, c10.w // r0.y = r4.y * 0.5 + 0.5
mul r1.xyw, r1, r0.x
mul r0.xzw, r0.w, r1.xyyw
mul r1.xyw, r1, c3.w
mad r1.xyw, r6.xyzz, c1.w, r1
mad r0.xzw, r6.xyyz, r2.w, r0
mad r0.xzw, r8.xyyz, r1.z, r0
mad r1.xyz, r8, c5.w, r1.xyww
add r0.xzw, r0, r0
mad r0.xzw, r1.xyyz, c10.x, r0 // r0.x = r1.x * 2 + r0.x
// r0.z = r1.y * 2 + r0.y
// r0.w = r1.y * 2 + r0.z
mov r1.xyz, c7
add r1.xyz, -r1, c6
mad r1.xyz, r0.y, r1, c7
add r0.xyz, r0.xzww, r1
mov r0.w, c8.w
mad r0.xyz, c9, r0.w, r0
// sample s0 with coordinate v0 and store in r1
texld r1, v0, s0
mad r0.xyz, r1, r0, r2
mov oC0.w, r1.w
add r0.xyz, r0, -v6
// this sets the final output color
mad oC0.xyz, v6.w, r0, v6
// approximately 104 instruction slots used (2 texture, 102 arithmetic)
Stream | Offset | Type | Method | Usage | UsageIndex |
---|---|---|---|---|---|
0 | 0 | FLOAT3 | DEFAULT | POSITION | |
0 | 12 | D3DCOLOR | DEFAULT | COLOR | |
0 | 16 | D3DCOLOR | DEFAULT | COLOR | |
0 | 20 | SHORT2N | DEFAULT | TEXCOORD | |
0 | 24 | SHORT2N | DEFAULT | TEXCOORD | |
0 | 28 | BLENDWEIGHT | DEFAULT | BLENDWEIGHT | |
2 | 0 | BLENDWEIGHT | DEFAULT | BLENDWEIGHT | |
2 | 4 | BLENDWEIGHT | DEFAULT | BLENDWEIGHT | |
D3DDECL_END |
vs_3_0
def c242, 2, -1, 765.005859, -0
def c243, -0.159154937, 0.5, 6.28318548, -3.14159274
dcl_position v0
dcl_blendweight v1
dcl_blendweight1 v2
dcl_blendindices v3
dcl_texcoord v4
dcl_texcoord1 v5
dcl_color v6
dcl_color1 v7
dcl_position o0
dcl_texcoord o1
dcl_texcoord1 o2.xyz
dcl_texcoord2 o3.xyz
dcl_texcoord3 o4.xyz
dcl_texcoord4 o5.xyz
dcl_texcoord5 o6.xyz
dcl_texcoord6 o7
dcl_texcoord8 o8
dcl_texcoord9 o9.xy
mad r0.xyz, v6.zyxw, c242.x, c242.y
mul r1, c242.z, v3.zyxw
mova a0, r1
mul r1, v1.y, c11[a0.y]
mad r1, v1.z, c11[a0.x], r1
mad r1, v1.x, c11[a0.z], r1
mad r1, v1.w, c11[a0.w], r1
dp3 r2.x, r0, r1
mul r3, v1.y, c12[a0.y]
mad r3, v1.z, c12[a0.x], r3
mad r3, v1.x, c12[a0.z], r3
mad r3, v1.w, c12[a0.w], r3
dp3 r2.y, r0, r3
mul r4, v1.y, c13[a0.y]
mad r4, v1.z, c13[a0.x], r4
mad r4, v1.x, c13[a0.z], r4
mad r4, v1.w, c13[a0.w], r4
dp3 r2.z, r0, r4
nrm r0.xyz, r2
dp3 o6.x, r0, c0
dp3 o6.y, r0, c1
dp3 o6.z, r0, c2
mad r0, v0.xyzx, -c242.yyyw, -c242.wwwy
dp4 r2.x, r0, r1
dp4 r2.y, r0, r3
dp4 r2.z, r0, r4
mov r2.w, -c242.y
dp4 r0.x, r2, c0
dp4 r0.z, r2, c2
dp4 r0.y, r2, c1
mov r0.w, -c242.y
dp4 o0.x, r0, c3
dp4 o0.y, r0, c4
dp4 o0.z, r0, c5
dp4 o0.w, r0, c6
dp4 r0.w, r0, c10
mad_sat r0.w, r0.w, c8.x, c8.y
mad r2.xyz, v7.zyxw, c242.x, c242.y
dp3 r5.x, r2, r1
dp3 r5.y, r2, r3
dp3 r5.z, r2, r4
dp3 o4.x, r5, c0
dp3 o4.y, r5, c1
dp3 o4.z, r5, c2
mad r2.xyz, v2.zyxw, c242.x, c242.y
dp3 r1.x, r2, r1
dp3 r1.y, r2, r3
dp3 r1.z, r2, r4
dp3 o5.x, r1, c0
dp3 o5.y, r1, c1
dp3 o5.z, r1, c2
add r1.xyz, r0.xzzw, -c236.xyyw
mov r2.z, c237.z
mad r1.w, r2.z, c243.x, c243.y
frc r1.w, r1.w
mad r1.w, r1.w, c243.z, c243.w
sincos r2.xy, r1.w
mul r1.yzw, r1.xxyz, r2.xxyx
mad r2.z, r1.x, -r2.y, r1.w
add r2.x, r1.z, r1.y
add r1.xz, -r2, c236.zyww
add r2.y, r0.y, -c237.x
add r1.y, -r2.y, c237.y
min r1.xyz, r2, r1
min r1.y, r1.z, r1.y
min r1.x, r1.x, r1.y
add r1.x, r1.x, -c237.w
mul r1.x, r1.x, c238.w
mad r1.y, r0.y, c8.z, c8.w
add r1.z, -r0.w, -c242.y
mul_sat r1.y, r1.y, r1.z
add_sat r0.w, r0.w, -r1.y
add r1.y, -r0.w, -c242.y
mul_sat r1.y, -r1.x, r1.y
max r1.x, -r1.x, -c242.w
min o8.w, r0.w, r1.x
add r1.x, -r1.y, -c242.y
mov r2.xyz, c9
add r1.yzw, -r2.xxyz, c238.xxyz
mul r1.xyz, r1.x, r1.yzww
mad o8.xyz, r0.w, r1, c9
mad o1.zw, v5.xyxy, c240.xyxy, c240
mad r1.xy, v4, c239, c239.zwzw
mul o9.xy, r1, c241
mov o1.xy, r1
add r1.xyz, r0, -c7
mov o3.xyz, r0
dp3 r0.x, r1, r1
rsq r0.x, r0.x
mul o2.xyz, r1, r0.x
mov o7, -c242.yyww
Pixel shader
ps_3_0
def c9, 2, -1, 1, 0.5
def c10, 0, 32, 0, 0
dcl_texcoord v0.xy
dcl_texcoord1 v1.xyz
dcl_texcoord2 v2.xyz
dcl_texcoord3 v3.xyz
dcl_texcoord4 v4.xyz
dcl_texcoord5 v5.xyz
dcl_texcoord8 v6
dcl_texcoord9 v7.xy
dcl_2d s0
dcl_2d s1
dcl_2d s2
texld r0, v7, s1
mad r0.xy, r0.wyzw, c9.x, c9.y
mul r1.xyz, r0.y, v4
mad r1.xyz, r0.x, v3, r1
dp2add r0.x, r0, -r0, c9.z
rsq r0.x, r0.x
rcp r0.x, r0.x
mad r0.xyz, r0.x, v5, r1
add r1.xyz, c2, -v2
dp3 r0.w, r1, r1
rsq r1.w, r0.w
mov r2.z, c9.z
mad r0.w, r0.w, -c2.w, r2.z
max r2.x, r0.w, c10.x
add_sat r0.w, r2.x, r2.x
mul r2.xyw, r0.w, c3.xyzz
nrm r3.xyz, v1
mad r4.xyz, r1, r1.w, -r3
mul r1.xyz, r1, r1.w
dp3_sat r0.w, r0, r1
mul r1.xyz, r2.xyww, r0.w
nrm r5.xyz, r4
dp3_sat r0.w, r0, r5
pow_sat r1.w, r0.w, c10.y
texld r4, v0, s2
mul r2.xyw, r2, r4.xyzz
mul r2.xyw, r1.w, r2
mul r5.xyz, r2.xyww, c3.w
add r6.xyz, c0, -v2
dp3 r0.w, r6, r6
rsq r1.w, r0.w
mad r0.w, r0.w, -c0.w, r2.z
max r3.w, r0.w, c10.x
add_sat r0.w, r3.w, r3.w
mul r7.xyz, r0.w, c1
mad r8.xyz, r6, r1.w, -r3
mul r6.xyz, r6, r1.w
dp3_sat r0.w, r0, r6
mul r6.xyz, r7, r0.w
mul r7.xyz, r4, r7
nrm r9.xyz, r8
dp3_sat r0.w, r0, r9
pow_sat r1.w, r0.w, c10.y
mul r7.xyz, r7, r1.w
mad r5.xyz, r7, c1.w, r5
add r8.xyz, c4, -v2
dp3 r0.w, r8, r8
rsq r1.w, r0.w
mad r0.w, r0.w, -c4.w, r2.z
max r3.w, r0.w, c10.x
add_sat r0.w, r3.w, r3.w
mul r9.xyz, r0.w, c5
mad r3.xyz, r8, r1.w, -r3
mul r8.xyz, r8, r1.w
dp3_sat r0.w, r0, r8
mul r8.xyz, r9, r0.w
mul r4.xyz, r4, r9
nrm r9.xyz, r3
dp3_sat r0.x, r0, r9
mad r0.y, r0.y, c9.w, c9.w
pow_sat r1.w, r0.x, c10.y
mul r0.xzw, r4.xyyz, r1.w
mad r3.xyz, r0.xzww, c5.w, r5
add r1.w, r2.z, -c3.w
mul r2.xyw, r2, r1.w
mul r4.xyz, r1, r1.w
mul r1.xyz, r1, c3.w
mad r1.xyz, r6, c1.w, r1
mad r1.xyz, r8, c5.w, r1
add r1.w, r2.z, -c1.w
mad r2.xyw, r7.xyzz, r1.w, r2
mad r4.xyz, r6, r1.w, r4
add r1.w, r2.z, -c5.w
mad r0.xzw, r0, r1.w, r2.xyyw
mad r2.xyz, r8, r1.w, r4
add r2.xyz, r2, r2
mad r1.xyz, r1, c9.x, r2
add r0.xzw, r0, r0
mad r0.xzw, r3.xyyz, c9.x, r0
mov r2.xyz, c7
add r2.xyz, -r2, c6
mad r2.xyz, r0.y, r2, c7
add r1.xyz, r1, r2
mad r1.xyz, c8, r4.w, r1
texld r2, v0, s0
mad r0.xyz, r2, r1, r0.xzww
mov oC0.w, r2.w
add r0.xyz, r0, -v6
mad oC0.xyz, v6.w, r0, v6
// approximately 104 instruction slots used (3 texture, 101 arithmetic)