Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added 48 bit Routines #453

Open
wants to merge 22 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
63 changes: 63 additions & 0 deletions src/crt/slland.src
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
; Performs 48 bit ("short long long") bitwise and
;
; Arguments:
; uiy = Most significant bytes of arg 1
; ubc = Least significant bytes of arg 1
; ude = Most signficant bytes of arg 2
; uhl = Least significant bytes of arg 2
;
; Returns:
; ude:uhl = ude:uhl & uiy:ubc

assume adl=1

section .text
public __slland
__slland:
push af

; Low and high

ld a, d
and a, iyh
ld d, a

ld a, e
and a, iyl
ld e, a

ld a, h
and a, b
ld h, a

ld a, l
and a, c
ld l, a

; Push 2/3 done values
push iy
ld iy, 0
add iy, sp
push de
push hl
push bc

; Deal with Upper Byte

ld a, (iy-1) ; a = u[de] (modified)
and a, (iy+2) ; a &= u[iy]
ld (iy-1), a ; u[de] (modified) = a

ld a, (iy-4) ; a = u[hl] (modified)
and a, (iy-7) ; a &= u[bc]
ld (iy-4), a ; u[hl] (modified) = a

; Load values back into registers and clean up stack
pop bc
pop hl
pop de
pop iy
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This optimization seems valid? -10 bytes, -10F cycles.

Suggested change
; Push 2/3 done values
push iy
ld iy, 0
add iy, sp
push de
push hl
push bc
; Deal with Upper Byte
ld a, (iy-1) ; a = u[de] (modified)
and a, (iy+2) ; a &= u[iy]
ld (iy-1), a ; u[de] (modified) = a
ld a, (iy-4) ; a = u[hl] (modified)
and a, (iy-7) ; a &= u[bc]
ld (iy-4), a ; u[hl] (modified) = a
; Load values back into registers and clean up stack
pop bc
pop hl
pop de
pop iy
; Push 2/3 done values
push de
push iy
push hl
sbc hl, hl
add hl, sp ; uhl = &l
push bc
; Deal with Upper Byte
ld de, 3
dec hl
ld a, (hl) ; a = bcu
add hl, de
and a, (hl) ; a &= hlu
ld (hl), a ; hlu = a
add hl, de
ld a, (hl) ; a = iyu
add hl, de
and a, (hl) ; a &= deu
ld (hl), a ; deu = a
; Load values back into registers and clean up stack
pop bc
pop hl
pop iy
pop de

If valid, the same pattern can be applied to sllor and sllxor, of course.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry this took me so long to get back to. This makes sense and works with everything I've thrown at it, so go ahead with or and xor.


pop af

ret
42 changes: 42 additions & 0 deletions src/crt/sllcmpu.src
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
; Performs 48 bit ("short long long") unsigned comparison
;
; Arguments:
; uiy = Most significant bytes of arg 1
; ubc = Least significant bytes of arg 1
; ude = Most signficant bytes of arg 2
; uhl = Least significant bytes of arg 2
;
; Most of the values found in these are used off the stack,
; although they are also already in the registers
;
; Returns:
; flags updated according to comparison

assume adl=1

section .text
public __sllcmpu
__sllcmpu:
push ix
ld ix, 0
add ix, sp

ld hl, (ix+18) ;hl = iy
fundudeone marked this conversation as resolved.
Show resolved Hide resolved
sbc hl, de ; most significant 1 - most significant 2
jq nz, .ne
or a, a
ld hl, 0
add hl, bc ;hl = bc = least significant 1
ld bc, (ix+21) ;bc = hl = least significant 2
sbc hl, bc ; least significant 1 - least significant 2
ld hl, (ix+21) ; hl = hl, restored
ld bc, (ix+15) ; bc = bc, restored
pop ix
ret z ; don't compliment carry if equal
ccf
ret
.ne:
ld hl, (ix+21) ; hl = hl, restored
pop ix
ccf
ret
26 changes: 26 additions & 0 deletions src/crt/sllcmpzero.src
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
; Performs 48 bit ("short long long") comparison with 0
;
; Arguments:
; ude = Most significant bytes of arg 1
; uhl = Least significant bytes of arg 1


; Returns:
; z status flag set if ude:uhl == 0
fundudeone marked this conversation as resolved.
Show resolved Hide resolved

assume adl=1

section .text
public __sllcmpzero
__sllcmpzero:
;check lsb's == 0
add hl, bc
or a, a
sbc hl, bc
ret nz
;check msb's == 0
fundudeone marked this conversation as resolved.
Show resolved Hide resolved
or a, a
sbc hl, de ; 0 - de, z set if de = 0
add hl, de ; restore hl
ret

31 changes: 31 additions & 0 deletions src/crt/slldivuremu.src
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
assume adl=1

section .text
public __slldivu
public __sllremu

; Quotient in ude:uhl, remainder (with remu) on stack, then loaded into ude:uhl
__slldivu:
call __slldvrmu
lea ix, ix-9
ld sp, ix
jr __slldivuremu.cleanup
__sllremu:
call __slldvrmu
lea ix, ix-9
ld sp, ix
ld de, (ix-18)
ld hl, (ix-27)
__slldivuremu.cleanup:
pop af
jp po, .skipEI
ei
.skipEI:
pop af
pop ix
inc sp ; skip over old return address
inc sp
inc sp
ret

extern __slldvrmu
136 changes: 136 additions & 0 deletions src/crt/slldvrmu.src
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
; Performs 48 bit ("short long long") division and modulo
;
; Arguments:
; ude = Most significant bytes of arg 1 (numerator)
; uhl = Least significant bytes of arg 1
; uiy = Most signficant bytes of arg 2 (denominator)
; ubc = Least significant bytes of arg 2
;
; Returns (potentially loaded into other registers by slldivuremu):
; ude:uhl quotient as well as remainder on the stack

assume adl=1

section .data

section .text
public __slldvrmu

__slldvrmu:
push ix
ld ix, 0
add ix, sp
;backup af
push af
;backup interrupt
ld a, i ; P = IEF2
di
push af

exx
push hl
push de
ld hl, 0
push bc
exx

push de
push hl
push iy
push bc

;Stack Use:
; ix+2 : return address
; ix+1 : return address
; ix : return address
; ix-1 : 0
; ix-2 : a
; ix-3 : f
; ix-4 : 0
; ix-5 : a interrupt stuff
; ix-6 : f interrupt stuff
; ix-7 : hlu'
; ix-8 : h'
; ix-9 : l'
; ix-10: deu'
; ix-11: d'
; ix-12: e'
; ix-13: bcu'
; ix-14: b'
; ix-15: c'
; ix-16: deu n[5]
; ix-17: d n[4]
; ix-18: e n[3]
; ix-19: hlu n[2]
; ix-20: h n[1]
; ix-21: l n[0]
; ix-22: iyu d[5]
; ix-23: iyh d[4]
; ix-24: iyl d[3]
; ix-25: bcu d[2]
; ix-26: b d[1]
; ix-27: c d[0]

; Set up for c quotient, uhl':uhl remainder, and ude':ude denom
ld de, (ix-27) ; ude = low denom

exx
lea de, iy+0 ; ude' = iy = high denom
exx

ld bc, $000600 ; b = byte counter
ld iy, 0
ld hl, 0
.byteLoop:
exx
ld b, 8 ; b' = bit counter
ld a, (ix-16); a = relevant numerator byte
.bitLoop:
exx ; loop entered with alt registers as b' is bit counter, exx here to not be in alt
sla c ; q<<1
; r<<1 and r[0] = n[b]
rla ; c = n[b]
adc hl, hl ; low r += low r + n[b]
exx
adc hl, hl ; high r += high r + c from low r
exx
; r - d
sbc hl, de ; low r -= low d
exx
sbc hl, de ; high r -= high d
exx
jr nc, .greaterEqual
; restore if r<d
add hl, de ; restore low
exx
adc hl, de ; restore high
djnz .bitLoop
jr .postBit
.greaterEqual:
; keep r -= d, and update quotient
inc c
exx
djnz .bitLoop
.postBit:
exx
ld (ix-16), c ; now that numerator byte isn't needed, overwrite with quotient
dec ix
djnz .byteLoop

;finish and clean up

pop bc
ld (ix-27), hl ; use bc's old space for remainder low
pop iy
pop hl ; hl = lower quotient
pop de ; de = upper quotient
exx
ld (ix-18), hl ; use de's old space for remainder high
pop bc
pop de
pop hl
exx

lea ix, ix+9 ; jump past stuff remaining on the stack
ld sp, ix ; sp = return address
ret ; cleanup finished in slldivuremu