Skip to content

Commit

Permalink
Update assembly code for lsh256 and lsh512 implementations
Browse files Browse the repository at this point in the history
  • Loading branch information
RyuaNerin committed Jun 19, 2024
1 parent 0297c3c commit 5adc6be
Show file tree
Hide file tree
Showing 6 changed files with 23 additions and 61 deletions.
6 changes: 3 additions & 3 deletions lsh256/asm_amd64_avx2.s
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

#include "textflag.h"

TEXT ·__lsh256_avx2_init(SB), NOSPLIT, $16
TEXT ·__lsh256_avx2_init(SB), NOSPLIT, $0-16
MOVQ ctx+0(FP), DI
MOVQ algtype+8(FP), SI

Expand Down Expand Up @@ -54,7 +54,7 @@ DATA LCDATA2<>+0x070(SB)/8, $0x0000000400000007
DATA LCDATA2<>+0x078(SB)/8, $0x0000000600000005
GLOBL LCDATA2<>(SB), RODATA|NOPTR, $128

TEXT ·__lsh256_avx2_update(SB), NOSPLIT, $32
TEXT ·__lsh256_avx2_update(SB), NOSPLIT, $0-32
MOVQ ctx+0(FP), DI
MOVQ data_base+8(FP), SI
MOVQ data_len+16(FP), DX
Expand Down Expand Up @@ -543,7 +543,7 @@ LBB1_34:
JE LBB1_41
JMP LBB1_36

TEXT ·__lsh256_avx2_final(SB), NOSPLIT, $16
TEXT ·__lsh256_avx2_final(SB), NOSPLIT, $0-16
MOVQ ctx+0(FP), DI
MOVQ hashval+8(FP), SI

Expand Down
21 changes: 5 additions & 16 deletions lsh256/asm_amd64_sse2.s
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

#include "textflag.h"

TEXT ·__lsh256_sse2_init(SB), NOSPLIT, $16
TEXT ·__lsh256_sse2_init(SB), NOSPLIT, $0-16
MOVQ ctx+0(FP), DI
MOVQ algtype+8(FP), SI

Expand Down Expand Up @@ -56,17 +56,13 @@ DATA LCDATA2<>+0x070(SB)/8, $0x0000000000000000
DATA LCDATA2<>+0x078(SB)/8, $0x0000000000000000
GLOBL LCDATA2<>(SB), RODATA|NOPTR, $128

TEXT ·__lsh256_sse2_update(SB), NOSPLIT, $128-32
// stacksize 96
TEXT ·__lsh256_sse2_update(SB), NOSPLIT, $96-32
MOVQ ctx+0(FP), DI
MOVQ data_base+8(FP), SI
MOVQ data_len+16(FP), DX
// data_cap+24

// stack 96
MOVQ SP, BP
ADDQ $16, SP
ANDQ $-16, SP
MOVQ BP, 96(SP)
LEAQ LCDATA2<>(SB), BP

WORD $0x4f8b; BYTE $0x10 // mov ecx, dword [rdi + 16]
Expand Down Expand Up @@ -1040,7 +1036,6 @@ LBB1_68:
WORD $0x4f89; BYTE $0x10 // mov dword [rdi + 16], ecx

LBB1_69:
MOVQ 96(SP), SP
RET

LBB1_12:
Expand Down Expand Up @@ -1169,15 +1164,11 @@ LBB1_34:
JMP LBB1_36


TEXT ·__lsh256_sse2_final(SB), NOSPLIT, $80-16
// stacksize 64
TEXT ·__lsh256_sse2_final(SB), NOSPLIT, $64-16
MOVQ ctx+0(FP), DI
MOVQ hashval+8(FP), SI

// stack size: 64
MOVQ SP, BP
ADDQ $16, SP
ANDQ $-16, SP
MOVQ BP, 64(SP)
LEAQ LCDATA2<>(SB), BP

LONG $0x10478b44 // mov r8d, dword [rdi + 16]
Expand Down Expand Up @@ -1633,6 +1624,4 @@ LBB2_14:
LONG $0xef0f4166; BYTE $0xc5 // pxor xmm0, xmm13
LONG $0x0e7f0ff3 // movdqu oword [rsi], xmm1
LONG $0x467f0ff3; BYTE $0x10 // movdqu oword [rsi + 16], xmm0

MOVQ 64(SP), SP
RET
4 changes: 2 additions & 2 deletions lsh256/asm_amd64_ssse3.s
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ DATA LCDATA2<>+0x050(SB)/8, $0x1359621b185fe69e
DATA LCDATA2<>+0x058(SB)/8, $0x1a116870263fccb2
GLOBL LCDATA2<>(SB), RODATA|NOPTR, $96

TEXT ·__lsh256_ssse3_update(SB), NOSPLIT, $32
TEXT ·__lsh256_ssse3_update(SB), NOSPLIT, $0-32
MOVQ ctx+0(FP), DI
MOVQ data_base+8(FP), SI
MOVQ data_len+16(FP), DX
Expand Down Expand Up @@ -708,7 +708,7 @@ LBB1_34:
JMP LBB1_36


TEXT ·__lsh256_ssse3_final(SB), NOSPLIT, $16
TEXT ·__lsh256_ssse3_final(SB), NOSPLIT, $0-16
MOVQ ctx+0(FP), DI
MOVQ hashval_base+8(FP), SI

Expand Down
13 changes: 4 additions & 9 deletions lsh512/asm_amd64_avx2.s
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

#include "textflag.h"

TEXT ·__lsh512_avx2_init(SB), NOSPLIT, $16
TEXT ·__lsh512_avx2_init(SB), NOSPLIT, $0-16
MOVQ ctx+0(FP), DI
MOVQ algtype+8(FP), SI

Expand Down Expand Up @@ -84,17 +84,13 @@ DATA LCDATA2<>+0x0e0(SB)/8, $0x0605040302010007
DATA LCDATA2<>+0x0e8(SB)/8, $0x080f0e0d0c0b0a09
GLOBL LCDATA2<>(SB), RODATA|NOPTR, $240

TEXT ·__lsh512_avx2_update(SB), NOSPLIT, $224-32
// stack size: 192
TEXT ·__lsh512_avx2_update(SB), NOSPLIT, $192-32
MOVQ ctx+0(FP), DI
MOVQ data_base+8(FP), SI
MOVQ data_len+16(FP), DX
// data_cap+24

// stack size: 192
MOVQ SP, BP
ADDQ $32, SP
ANDQ $-32, SP
MOVQ BP, 192(SP)
LEAQ LCDATA2<>(SB), BP

WORD $0x4f8b; BYTE $0x10 // mov ecx, dword [rdi + 16]
Expand Down Expand Up @@ -648,7 +644,6 @@ LBB1_68:
WORD $0x4f89; BYTE $0x10 // mov dword [rdi + 16], ecx

LBB1_69:
MOVQ 192(SP), SP
VZEROUPPER
RET

Expand Down Expand Up @@ -789,7 +784,7 @@ LBB1_34:
JE LBB1_41
JMP LBB1_36

TEXT ·__lsh512_avx2_final(SB), NOSPLIT, $16
TEXT ·__lsh512_avx2_final(SB), NOSPLIT, $0-16
MOVQ ctx+0(FP), DI
MOVQ hashval+8(FP), SI

Expand Down
21 changes: 5 additions & 16 deletions lsh512/asm_amd64_sse2.s
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

#include "textflag.h"

TEXT ·__lsh512_sse2_init(SB), NOSPLIT, $16
TEXT ·__lsh512_sse2_init(SB), NOSPLIT, $0-16
MOVQ ctx+0(FP), DI
MOVQ algtype+8(FP), SI

Expand Down Expand Up @@ -77,17 +77,13 @@ DATA LCDATA2<>+0x070(SB)/8, $0x6cc37895f4ad9e70
DATA LCDATA2<>+0x078(SB)/8, $0x448304c8d7f3f4d5
GLOBL LCDATA2<>(SB), RODATA|NOPTR, $128

TEXT ·__lsh512_sse2_update(SB), NOSPLIT, $320-32
// stack size: 288
TEXT ·__lsh512_sse2_update(SB), NOSPLIT, $288-32
MOVQ ctx+0(FP), DI
MOVQ data_base+8(FP), SI
MOVQ data_len+16(FP), DX
// data_cap+24

// stack size: 288
MOVQ SP, BP
ADDQ $16, SP
ANDQ $-16, SP
MOVQ BP, 288(SP)
LEAQ LCDATA2<>(SB), BP

WORD $0x4f8b; BYTE $0x10 // mov ecx, dword [rdi + 16]
Expand Down Expand Up @@ -1467,7 +1463,6 @@ LBB1_69:
WORD $0x4f89; BYTE $0x10 // mov dword [rdi + 16], ecx

LBB1_70:
MOVQ 288(SP), SP
RET

LBB1_12:
Expand Down Expand Up @@ -1595,15 +1590,11 @@ LBB1_34:
JE LBB1_41
JMP LBB1_36

TEXT ·__lsh512_sse2_final(SB), NOSPLIT, $288-16
// Stack size: 272
TEXT ·__lsh512_sse2_final(SB), NOSPLIT, $272-16
MOVQ ctx+0(FP), DI
MOVQ hashval+8(FP), SI

// Stack size: 272
MOVQ SP, BP
ADDQ $16, SP
ANDQ $-16, SP
MOVQ BP, 272(SP)
LEAQ LCDATA2<>(SB), BP

LONG $0x10478b44 // mov r8d, dword [rdi + 16]
Expand Down Expand Up @@ -2265,6 +2256,4 @@ LBB2_14:
LONG $0x5e7f0ff3; BYTE $0x10 // movdqu oword [rsi + 16], xmm3
LONG $0x567f0ff3; BYTE $0x20 // movdqu oword [rsi + 32], xmm2
LONG $0x7f0f44f3; WORD $0x3046 // movdqu oword [rsi + 48], xmm8

MOVQ 272(SP), SP
RET
19 changes: 4 additions & 15 deletions lsh512/asm_amd64_ssse3.s
Original file line number Diff line number Diff line change
Expand Up @@ -31,17 +31,13 @@ DATA LCDATA2<>+0x090(SB)/8, $0x6cc37895f4ad9e70
DATA LCDATA2<>+0x098(SB)/8, $0x448304c8d7f3f4d5
GLOBL LCDATA2<>(SB), RODATA|NOPTR, $160

TEXT ·__lsh512_ssse3_update(SB), NOSPLIT, $320-32
// stack size: 288
TEXT ·__lsh512_ssse3_update(SB), NOSPLIT, $288-32
MOVQ ctx+0(FP), DI
MOVQ data_base+8(FP), SI
MOVQ data_len+16(FP), DX
// data_cap+24

// stack size: 288
MOVQ SP, BP
ADDQ $16, SP
ANDQ $-16, SP
MOVQ BP, 288(SP)
LEAQ LCDATA2<>(SB), BP

WORD $0x4f8b; BYTE $0x10 // mov ecx, dword [rdi + 16]
Expand Down Expand Up @@ -1138,7 +1134,6 @@ LBB1_69:
WORD $0x4f89; BYTE $0x10 // mov dword [rdi + 16], ecx

LBB1_70:
MOVQ 288(SP), SP
RET

LBB1_14:
Expand Down Expand Up @@ -1266,15 +1261,11 @@ LBB1_36:
JE LBB1_43
JMP LBB1_38

TEXT ·__lsh512_ssse3_final(SB), NOSPLIT, $288-16
// Stack size: 272
TEXT ·__lsh512_ssse3_final(SB), NOSPLIT, $272-16
MOVQ ctx+0(FP), DI
MOVQ hashval+8(FP), SI

// Stack size: 272
MOVQ SP, BP
ADDQ $16, SP
ANDQ $-16, SP
MOVQ BP, 272(SP)
LEAQ LCDATA2<>(SB), BP

LONG $0x10478b44 // mov r8d, dword [rdi + 16]
Expand Down Expand Up @@ -1785,6 +1776,4 @@ LBB2_14:
LONG $0x5e7f0ff3; BYTE $0x10 // movdqu oword [rsi + 16], xmm3
LONG $0x6e7f0ff3; BYTE $0x20 // movdqu oword [rsi + 32], xmm5
LONG $0x7f0f44f3; WORD $0x3066 // movdqu oword [rsi + 48], xmm12

MOVQ 272(SP), SP
RET

0 comments on commit 5adc6be

Please sign in to comment.