Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 70 additions & 0 deletions src/crt/i48mulhu.src
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
assume adl=1

section .text

public __i48mulhu

; UDE:UHL = ((uint96_t)UDE:UHL * (uint96_t)UIY:UBC) >> 48
__i48mulhu:
; CC: 88 bytes
; minimum: 87F + 39R + 39W + 2
; maximum: 89F + 39R + 39W + 3
; including __i48mulu:
; minimum: 895F + 243R + 179W + 342
; maximum: 897F + 243R + 179W + 343
push ix
push iy
push bc
ld ix, 0
lea iy, ix
add ix, sp
push de
push hl

; x_lo * y_lo
lea de, iy
call __i48mulu
push de ; UHL * UBC (low carry)

; x_hi * y_lo
lea de, iy
ld hl, (ix - 3)
call __i48mulu
push de ; hi24
push hl ; lo24

; x_lo * y_hi
lea de, iy
ld bc, (ix + 3)
ld hl, (ix - 6)
call __i48mulu
pop bc ; lo24
add hl, bc
ex de, hl
pop bc ; hi24
adc hl, bc

pop bc ; UHL * UBC (low carry)
ex de, hl
add hl, bc
jr nc, .no_low_carry
inc de
.no_low_carry:
push de ; high carry

; x_hi * y_hi
lea de, iy
ld bc, (ix + 3)
ld hl, (ix - 3)
call __i48mulu
pop bc ; high carry
add hl, bc
ld sp, ix
pop bc
pop iy
pop ix
ret nc ; no high carry
inc de
ret

extern __i48mulu
145 changes: 145 additions & 0 deletions src/crt/imulhu.src
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
assume adl=1

section .text

public __imulhu

; UHL = ((uint48_t)UHL * (uint48_t)UBC) >> 24
__imulhu:
; TODO: Optimize this routine as this is mostly just a copy paste of __i48mulu with some stuff removed.
;
; CC: 118*r(PC)+39*r(SPL)+38*w(SPL)+37
; CC: 117 bytes | 118F + 39R + 38W + 37
push de
; backup af
push af
push ix
ld ix, 0
add ix, sp

; On stack to get upper byte when needed
push de ; de will also be used to perform the actual multiplication
push hl
push iy
push bc

; bc = a[0], a[1]
ld a, l ; a = b[0]
ld iy, (ix - 5) ; iy = b[1], b[2]

; or a, a ; carry is already cleared
sbc hl, hl
push hl ; upper bytes of sum at -15
; Stack Use:
; ix-1 : deu b[5]
; ix-2 : d b[4]
; ix-3 : e b[3]
; ix-4 : hlu b[2]
; ix-5 : h b[1]
; ix-6 : l b[0]
; ix-7 : iyu a[5]
; ix-8 : iyh a[4]
; ix-9 : iyl a[3]
; ix-10 : bcu a[2]
; ix-11 : b a[1]
; ix-12 : c a[0]
; ix-13 : sum[5]
; ix-14 : sum[4]
; ix-15 : sum[3]
; ix-16 : sum[2]
; ix-17 : sum[1]
; ix-18 : sum[0]

; ======================================================================
; sum[0-1]

; a[0]*b[0]
ld d, c ; d = a[0]
ld e, a ; e = b[0]
mlt de
push de ; lower bytes of sum at -18

; ======================================================================
; sum[1-2]
ld l, d ; hl will store current partial sum

; a[1]*b[0]
ld d, b ; d = a[1]
ld e, a ; e = b[0]
mlt de
add hl, de

; a[0]*b[1]
ld d, c ; d = a[0]
ld e, iyl ; e = b[1]
mlt de
add hl, de

ld (ix - 17), hl

; ======================================================================
; sum[2-3]
ld hl, (ix - 16) ; hl will store current partial sum

; a[0]*b[2]
ld d, c ; d = a[0]
ld e, iyh ; e = b[2]
mlt de
add hl, de

; a[1]*b[1]
ld d, b ; d = a[1]
ld e, iyl ; e = b[1]
mlt de
add hl, de

; a[2]*b[0]
ld d, (ix - 10) ; d = a[2]
ld e, a ; e = b[0]
mlt de
add hl, de

ld (ix - 16), hl

; ======================================================================
; sum[3-4]
ld hl, (ix - 15) ; hl will store current partial sum

; a[1]*b[2]
ld d, b ; d = a[1]
ld e, iyh ; e = b[2]
mlt de
add hl, de

; a[2]*b[1]
ld d, (ix - 10) ; d = a[2]
ld e, iyl ; e = b[1]
mlt de
add hl, de

ld (ix - 15), hl

; ======================================================================
; sum[4-5]
ld hl, (ix - 14) ; hl will store current partial sum

; a[2]*b[2]
ld d, (ix - 10) ; d = a[2]
ld e, iyh ; e = b[2]
mlt de
add hl, de

ld (ix - 14), l
ld (ix - 13), h

; clean up stack and restore registers
pop de
pop hl
pop bc
pop iy

ld sp, ix
pop ix
pop af
pop de
ret
101 changes: 101 additions & 0 deletions src/crt/llmulhu.src
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
assume adl=1

section .text

public __llmulhu

; BC:UDE:UHL = ((uint128_t)BC:UDE:UHL * (uint128_t)(SP64)) >> 64
__llmulhu:
push ix
push iy
ld ix, -36
add ix, sp
ld sp, ix
lea ix, ix + 36

ld (ix - 3), bc
ld (ix - 6), de
ld (ix - 9), hl

ld bc, 0
ld (ix - 13), bc
ld (ix - 30), bc
ld c, (ix + 12)
ld (ix - 33), bc
ld iy, (ix + 9)
ld (ix - 36), iy

; x_lo * y_lo
ld c, b
ld d, b
inc de
dec.s de
call __llmulu
ld (ix - 16), bc
ld (ix - 19), de
ld bc, 0
ld (ix - 14), b

; x_hi * y_lo
inc.s de
ld d, b
ld e, (ix - 2)
ld hl, (ix - 5)
call __llmulu
ld (ix - 21), bc
ld (ix - 24), de
ld (ix - 27), hl

ld c, (ix + 16)
ld (ix - 33), c
ld iy, (ix + 13)
ld (ix - 36), iy

; x_lo * y_hi
ld bc, 0
inc.s de
ld d, b
ld e, (ix - 6)
ld hl, (ix - 9)
call __llmulu
lea iy, ix - 27
call __llmulhu_add
lea iy, ix - 18
call __llmulhu_add
ld (ix - 16), bc
ld (ix - 19), de
ld bc, 0
ld (ix - 14), b

; x_hi * y_hi
inc.s de
ld d, b
ld e, (ix - 2)
ld hl, (ix - 5)
call __llmulu
lea iy, ix - 18
call __llmulhu_add
ld sp, ix
pop iy
pop ix
ret

__llmulhu_add:
; similar to __lladd, except iy points to the stack and is destroyed
push bc
ld bc, (iy + 0)
add hl, bc
ex de, hl
ld bc, (iy + 3)
adc hl, bc
ex de, hl
pop bc
jr nc, .no_carry48
inc bc
.no_carry48:
ld iy, (iy + 6)
add iy, bc
lea bc, iy
ret

extern __llmulu
42 changes: 42 additions & 0 deletions src/crt/lmulhu.src
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
assume adl=1

section .text

public __lmulhu

; E:UHL = ((uint64_t)E:UHL * (uint64_t)A:UBC) >> 32
__lmulhu:
push iy
push de
ld iy, 0
push iy
ld iyl, a
push iy
push bc
ld iyl, iyh ; ld iy, 0
lea bc, iy
inc de
dec.s de
ld d, b
call __llmulu
; E = B
; UHL = C
; H = UDE
; L = D
add iy, sp
push de
ld e, (iy - 1) ; H = UDE
ld (iy - 1), c ; UHL = C
pop hl ; UHL = C
ld h, e ; H = UDE
ld l, d ; L = D
ld iyl, b ; E = B
pop bc
pop de ; reset SP
pop de ; reset SP
pop de
ld e, iyl ; E = B
pop iy
ret

extern __llmulu
36 changes: 36 additions & 0 deletions src/crt/smulhu.src
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
assume adl=1

section .text

public __smulhu

; HL = ((uint32_t)HL * (uint32_t)BC) >> 16
__smulhu:
; CC: 32 bytes
; ADL = 1: 33F + 12R + 9W + 17
; ADL = 0: 33F + 8R + 6W + 17
push af
push de
push bc
ld d, l
ld e, c
mlt de ; L * C
ld a, d
ld d, l
ld e, b
mlt de ; L * B
ld l, b
ld b, h
mlt bc ; H * C
mlt hl ; H * B
add a, c
ld c, b
ld b, 0
adc hl, bc
add a, e
ld c, d
adc hl, bc ; result is [0, $FFFE]
pop bc
pop de
pop af
ret
Loading
Loading