/
sm_30_sass_vmin_dp4a_kernels.txt
135 lines (124 loc) · 8.97 KB
/
sm_30_sass_vmin_dp4a_kernels.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
Fatbin elf code:
================
arch = sm_30
code version = [1,7]
producer = <unknown>
host = linux
compile_size = 64bit
code for sm_30
Function : _Z11loop_unrollRiPKaS1_RKi
.headerflags @"EF_CUDA_SM30 EF_CUDA_PTX_SM(EF_CUDA_SM30)"
/* 0x2200420042804307 */
/*0008*/ MOV R1, c[0x0][0x44]; /* 0x2800400110005de4 */
/*0010*/ MOV R4, c[0x0][0x158]; /* 0x2800400560011de4 */
/*0018*/ MOV R5, c[0x0][0x15c]; /* 0x2800400570015de4 */
/*0020*/ LD.E R4, [R4]; /* 0x8400000000411c85 */
/*0028*/ MOV R2, c[0x0][0x140]; /* 0x2800400500009de4 */
/*0030*/ MOV R3, c[0x0][0x144]; /* 0x280040051000dde4 */
/*0038*/ MOV R6, c[0x0][0x148]; /* 0x2800400520019de4 */
/* 0x2283f232e3f04207 */
/*0048*/ MOV R7, c[0x0][0x14c]; /* 0x280040053001dde4 */
/*0050*/ MOV R8, c[0x0][0x150]; /* 0x2800400540021de4 */
/*0058*/ MOV R9, c[0x0][0x154]; /* 0x2800400550025de4 */
/*0060*/ ST.E [R2], R4; /* 0x9400000000211c85 */
/*0068*/ LD.E.S8 R0, [R6]; /* 0x8400000000601c25 */
/*0070*/ LD.E.S8 R11, [R8]; /* 0x840000000082dc25 */
/*0078*/ IMAD R11, R0, R11, R4; /* 0x200800002c02dca3 */
/* 0x23f232e283f232e7 */
/*0088*/ ST.E [R2], R11; /* 0x940000000022dc85 */
/*0090*/ LD.E.S8 R0, [R6+0x1]; /* 0x8400000004601c25 */
/*0098*/ LD.E.S8 R12, [R8+0x1]; /* 0x8400000004831c25 */
/*00a0*/ IMAD R12, R0, R12, R11; /* 0x2016000030031ca3 */
/*00a8*/ ST.E [R2], R12; /* 0x9400000000231c85 */
/*00b0*/ LD.E.S8 R0, [R6+0x2]; /* 0x8400000008601c25 */
/*00b8*/ LD.E.S8 R5, [R8+0x2]; /* 0x8400000008815c25 */
/* 0x22f04283f232e287 */
/*00c8*/ IMAD R5, R0, R5, R12; /* 0x2018000014015ca3 */
/*00d0*/ ST.E [R2], R5; /* 0x9400000000215c85 */
/*00d8*/ LD.E.S8 R0, [R6+0x3]; /* 0x840000000c601c25 */
/*00e0*/ LD.E.S8 R4, [R8+0x3]; /* 0x840000000c811c25 */
/*00e8*/ IMAD R4, R0, R4, R5; /* 0x200a000010011ca3 */
/*00f0*/ ST.E [R2], R4; /* 0x9400000000211c85 */
/*00f8*/ EXIT; /* 0x8000000000001de7 */
/*0100*/ BRA 0x100; /* 0x4003ffffe0001de7 */
/*0108*/ NOP; /* 0x4000000000001de4 */
/*0110*/ NOP; /* 0x4000000000001de4 */
/*0118*/ NOP; /* 0x4000000000001de4 */
/*0120*/ NOP; /* 0x4000000000001de4 */
/*0128*/ NOP; /* 0x4000000000001de4 */
/*0130*/ NOP; /* 0x4000000000001de4 */
/*0138*/ NOP; /* 0x4000000000001de4 */
.....................................
Function : _Z4dp4aRiRKjS1_RKi
.headerflags @"EF_CUDA_SM30 EF_CUDA_PTX_SM(EF_CUDA_SM30)"
/* 0x200000000002f307 */
/*0008*/ MOV R1, c[0x0][0x44]; /* 0x2800400110005de4 */
/*0010*/ EXIT; /* 0x8000000000001de7 */
/*0018*/ BRA 0x18; /* 0x4003ffffe0001de7 */
/*0020*/ NOP; /* 0x4000000000001de4 */
/*0028*/ NOP; /* 0x4000000000001de4 */
/*0030*/ NOP; /* 0x4000000000001de4 */
/*0038*/ NOP; /* 0x4000000000001de4 */
.............................
Function : _Z5vmin4RiRKjS1_RKi
.headerflags @"EF_CUDA_SM30 EF_CUDA_PTX_SM(EF_CUDA_SM30)"
/* 0x2240422042004307 */
/*0008*/ MOV R1, c[0x0][0x44]; /* 0x2800400110005de4 */
/*0010*/ MOV R2, c[0x0][0x148]; /* 0x2800400520009de4 */
/*0018*/ MOV R3, c[0x0][0x14c]; /* 0x280040053000dde4 */
/*0020*/ MOV R4, c[0x0][0x150]; /* 0x2800400540011de4 */
/*0028*/ MOV R5, c[0x0][0x154]; /* 0x2800400550015de4 */
/*0030*/ MOV R6, c[0x0][0x158]; /* 0x2800400560019de4 */
/*0038*/ MOV R7, c[0x0][0x15c]; /* 0x280040057001dde4 */
/* 0x220283f230423047 */
/*0048*/ LD.E R2, [R2]; /* 0x8400000000209c85 */
/*0050*/ MOV R8, c[0x0][0x140]; /* 0x2800400500021de4 */
/*0058*/ LD.E R4, [R4]; /* 0x8400000000411c85 */
/*0060*/ MOV R9, c[0x0][0x144]; /* 0x2800400510025de4 */
/*0068*/ LD.E R7, [R6]; /* 0x840000000061dc85 */
/*0070*/ VMNMX4.ACC R11, R2, R4, R7; /* 0x878ec44c1022dc64 */
/*0078*/ ST.E [R8], R11; /* 0x940000000082dc85 */
/* 0x20000000000002f7 */
/*0088*/ EXIT; /* 0x8000000000001de7 */
/*0090*/ BRA 0x90; /* 0x4003ffffe0001de7 */
/*0098*/ NOP; /* 0x4000000000001de4 */
/*00a0*/ NOP; /* 0x4000000000001de4 */
/*00a8*/ NOP; /* 0x4000000000001de4 */
/*00b0*/ NOP; /* 0x4000000000001de4 */
/*00b8*/ NOP; /* 0x4000000000001de4 */
..............................
Function : _Z15vmin4_intrinsicRiRKjS1_RKi
.headerflags @"EF_CUDA_SM30 EF_CUDA_PTX_SM(EF_CUDA_SM30)"
/* 0x2240422042004307 */
/*0008*/ MOV R1, c[0x0][0x44]; /* 0x2800400110005de4 */
/*0010*/ MOV R2, c[0x0][0x148]; /* 0x2800400520009de4 */
/*0018*/ MOV R3, c[0x0][0x14c]; /* 0x280040053000dde4 */
/*0020*/ MOV R4, c[0x0][0x150]; /* 0x2800400540011de4 */
/*0028*/ MOV R5, c[0x0][0x154]; /* 0x2800400550015de4 */
/*0030*/ MOV R6, c[0x0][0x140]; /* 0x2800400500019de4 */
/*0038*/ MOV R7, c[0x0][0x144]; /* 0x280040051001dde4 */
/* 0x2232220283f23237 */
/*0048*/ LD.E R2, [R2]; /* 0x8400000000209c85 */
/*0050*/ LD.E R4, [R4]; /* 0x8400000000411c85 */
/*0058*/ LD.E R11, [R6]; /* 0x840000000062dc85 */
/*0060*/ VMNMX4 R0, R2, R4, RZ; /* 0x87fe844c10201c64 */
/*0068*/ BFE R9, R0, 0x808; /* 0x7000c02020025c23 */
/*0070*/ IMAD.HI R10, R0, 0x100, R11; /* 0x2016c00400029ce3 */
/*0078*/ BFE R8, R0, 0x800; /* 0x7000c02000021c23 */
/* 0x2002f04282828217 */
/*0088*/ BFE R0, R0, 0x810; /* 0x7000c02040001c23 */
/*0090*/ IADD R9, R9, R10; /* 0x4800000028925c03 */
/*0098*/ IADD R9, R8, R9; /* 0x4800000024825c03 */
/*00a0*/ IADD R0, R0, R9; /* 0x4800000024001c03 */
/*00a8*/ ST.E [R6], R0; /* 0x9400000000601c85 */
/*00b0*/ EXIT; /* 0x8000000000001de7 */
/*00b8*/ BRA 0xb8; /* 0x4003ffffe0001de7 */
.........................................
Fatbin ptx code:
================
arch = sm_30
code version = [6,5]
producer = <unknown>
host = linux
compile_size = 64bit
compressed