-
-
Notifications
You must be signed in to change notification settings - Fork 54
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Added 48 bit Routines #453
Open
fundudeone
wants to merge
22
commits into
CE-Programming:master
Choose a base branch
from
fundudeone:master
base: master
Could not load branches
Branch not found: {{ refName }}
Could not load tags
Nothing to show
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from 12 commits
Commits
Show all changes
22 commits
Select commit
Hold shift + click to select a range
23079e3
Added 48 bit Routines
fundudeone ea4461c
Fixed and optimized sllnot
fundudeone 262100f
Updated sllmulu
fundudeone fe47158
slland/or/xor now uses iy for frame pointer
fundudeone bb6b56a
Update sllcmpu
fundudeone a55233e
Improved sllcmpzero
fundudeone 6fcb615
Updated sllmulu (again)
fundudeone c59e627
Rewrote sllshl
fundudeone a42b949
Update sllshru/s
fundudeone d2d9bfc
Merge branch 'master' into master
fundudeone ddc833e
Added slldvrmu
fundudeone 1637512
Merge branch 'master' of https://github.com/fundudeone/toolchain
fundudeone db29708
Renamed Files to Match New Clang
fundudeone 9157a1f
Changed i48cmpu to no longer use arguments from stack
fundudeone 7ab68d3
Optimized i48cmpu
fundudeone 2744d4c
Corrected how i48dvrmu Outputs
fundudeone 08cb04c
Added i48cmps
fundudeone bd5c933
Added routines for i48 bit manipulation intrensics
fundudeone 092c1a3
i48cmpzero now outputs correct sign flag
fundudeone 1b0c873
Added i48neg, i48divs, and i48rems
fundudeone f6a61e3
Optimized large right shifts
fundudeone 2974b43
Decreased size and increased speed of division related routines
fundudeone File filter
Filter by extension
Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
; Performs 48 bit ("short long long") bitwise and | ||
; | ||
; Arguments: | ||
; uiy = Most significant bytes of arg 1 | ||
; ubc = Least significant bytes of arg 1 | ||
; ude = Most signficant bytes of arg 2 | ||
; uhl = Least significant bytes of arg 2 | ||
; | ||
; Returns: | ||
; ude:uhl = ude:uhl & uiy:ubc | ||
|
||
assume adl=1 | ||
|
||
section .text | ||
public __slland | ||
__slland: | ||
push af | ||
|
||
; Low and high | ||
|
||
ld a, d | ||
and a, iyh | ||
ld d, a | ||
|
||
ld a, e | ||
and a, iyl | ||
ld e, a | ||
|
||
ld a, h | ||
and a, b | ||
ld h, a | ||
|
||
ld a, l | ||
and a, c | ||
ld l, a | ||
|
||
; Push 2/3 done values | ||
push iy | ||
ld iy, 0 | ||
add iy, sp | ||
push de | ||
push hl | ||
push bc | ||
|
||
; Deal with Upper Byte | ||
|
||
ld a, (iy-1) ; a = u[de] (modified) | ||
and a, (iy+2) ; a &= u[iy] | ||
ld (iy-1), a ; u[de] (modified) = a | ||
|
||
ld a, (iy-4) ; a = u[hl] (modified) | ||
and a, (iy-7) ; a &= u[bc] | ||
ld (iy-4), a ; u[hl] (modified) = a | ||
|
||
; Load values back into registers and clean up stack | ||
pop bc | ||
pop hl | ||
pop de | ||
pop iy | ||
|
||
pop af | ||
|
||
ret |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
; Performs 48 bit ("short long long") unsigned comparison | ||
; | ||
; Arguments: | ||
; uiy = Most significant bytes of arg 1 | ||
; ubc = Least significant bytes of arg 1 | ||
; ude = Most signficant bytes of arg 2 | ||
; uhl = Least significant bytes of arg 2 | ||
; | ||
; Most of the values found in these are used off the stack, | ||
; although they are also already in the registers | ||
; | ||
; Returns: | ||
; flags updated according to comparison | ||
|
||
assume adl=1 | ||
|
||
section .text | ||
public __sllcmpu | ||
__sllcmpu: | ||
push ix | ||
ld ix, 0 | ||
add ix, sp | ||
|
||
ld hl, (ix+18) ;hl = iy | ||
fundudeone marked this conversation as resolved.
Show resolved
Hide resolved
|
||
sbc hl, de ; most significant 1 - most significant 2 | ||
jq nz, .ne | ||
or a, a | ||
ld hl, 0 | ||
add hl, bc ;hl = bc = least significant 1 | ||
ld bc, (ix+21) ;bc = hl = least significant 2 | ||
sbc hl, bc ; least significant 1 - least significant 2 | ||
ld hl, (ix+21) ; hl = hl, restored | ||
ld bc, (ix+15) ; bc = bc, restored | ||
pop ix | ||
ret z ; don't compliment carry if equal | ||
ccf | ||
ret | ||
.ne: | ||
ld hl, (ix+21) ; hl = hl, restored | ||
pop ix | ||
ccf | ||
ret |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
; Performs 48 bit ("short long long") comparison with 0 | ||
; | ||
; Arguments: | ||
; ude = Most significant bytes of arg 1 | ||
; uhl = Least significant bytes of arg 1 | ||
|
||
|
||
; Returns: | ||
; z status flag set if ude:uhl == 0 | ||
fundudeone marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
assume adl=1 | ||
|
||
section .text | ||
public __sllcmpzero | ||
__sllcmpzero: | ||
;check lsb's == 0 | ||
add hl, bc | ||
or a, a | ||
sbc hl, bc | ||
ret nz | ||
;check msb's == 0 | ||
fundudeone marked this conversation as resolved.
Show resolved
Hide resolved
|
||
or a, a | ||
sbc hl, de ; 0 - de, z set if de = 0 | ||
add hl, de ; restore hl | ||
ret | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
assume adl=1 | ||
|
||
section .text | ||
public __slldivu | ||
public __sllremu | ||
|
||
; Quotient in ude:uhl, remainder (with remu) on stack, then loaded into ude:uhl | ||
__slldivu: | ||
call __slldvrmu | ||
lea ix, ix-9 | ||
ld sp, ix | ||
jr __slldivuremu.cleanup | ||
__sllremu: | ||
call __slldvrmu | ||
lea ix, ix-9 | ||
ld sp, ix | ||
ld de, (ix-18) | ||
ld hl, (ix-27) | ||
__slldivuremu.cleanup: | ||
pop af | ||
jp po, .skipEI | ||
ei | ||
.skipEI: | ||
pop af | ||
pop ix | ||
inc sp ; skip over old return address | ||
inc sp | ||
inc sp | ||
ret | ||
|
||
extern __slldvrmu |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,136 @@ | ||
; Performs 48 bit ("short long long") division and modulo | ||
; | ||
; Arguments: | ||
; ude = Most significant bytes of arg 1 (numerator) | ||
; uhl = Least significant bytes of arg 1 | ||
; uiy = Most signficant bytes of arg 2 (denominator) | ||
; ubc = Least significant bytes of arg 2 | ||
; | ||
; Returns (potentially loaded into other registers by slldivuremu): | ||
; ude:uhl quotient as well as remainder on the stack | ||
|
||
assume adl=1 | ||
|
||
section .data | ||
|
||
section .text | ||
public __slldvrmu | ||
|
||
__slldvrmu: | ||
push ix | ||
ld ix, 0 | ||
add ix, sp | ||
;backup af | ||
push af | ||
;backup interrupt | ||
ld a, i ; P = IEF2 | ||
di | ||
push af | ||
|
||
exx | ||
push hl | ||
push de | ||
ld hl, 0 | ||
push bc | ||
exx | ||
|
||
push de | ||
push hl | ||
push iy | ||
push bc | ||
|
||
;Stack Use: | ||
; ix+2 : return address | ||
; ix+1 : return address | ||
; ix : return address | ||
; ix-1 : 0 | ||
; ix-2 : a | ||
; ix-3 : f | ||
; ix-4 : 0 | ||
; ix-5 : a interrupt stuff | ||
; ix-6 : f interrupt stuff | ||
; ix-7 : hlu' | ||
; ix-8 : h' | ||
; ix-9 : l' | ||
; ix-10: deu' | ||
; ix-11: d' | ||
; ix-12: e' | ||
; ix-13: bcu' | ||
; ix-14: b' | ||
; ix-15: c' | ||
; ix-16: deu n[5] | ||
; ix-17: d n[4] | ||
; ix-18: e n[3] | ||
; ix-19: hlu n[2] | ||
; ix-20: h n[1] | ||
; ix-21: l n[0] | ||
; ix-22: iyu d[5] | ||
; ix-23: iyh d[4] | ||
; ix-24: iyl d[3] | ||
; ix-25: bcu d[2] | ||
; ix-26: b d[1] | ||
; ix-27: c d[0] | ||
|
||
; Set up for c quotient, uhl':uhl remainder, and ude':ude denom | ||
ld de, (ix-27) ; ude = low denom | ||
|
||
exx | ||
lea de, iy+0 ; ude' = iy = high denom | ||
exx | ||
|
||
ld bc, $000600 ; b = byte counter | ||
ld iy, 0 | ||
ld hl, 0 | ||
.byteLoop: | ||
exx | ||
ld b, 8 ; b' = bit counter | ||
ld a, (ix-16); a = relevant numerator byte | ||
.bitLoop: | ||
exx ; loop entered with alt registers as b' is bit counter, exx here to not be in alt | ||
sla c ; q<<1 | ||
; r<<1 and r[0] = n[b] | ||
rla ; c = n[b] | ||
adc hl, hl ; low r += low r + n[b] | ||
exx | ||
adc hl, hl ; high r += high r + c from low r | ||
exx | ||
; r - d | ||
sbc hl, de ; low r -= low d | ||
exx | ||
sbc hl, de ; high r -= high d | ||
exx | ||
jr nc, .greaterEqual | ||
; restore if r<d | ||
add hl, de ; restore low | ||
exx | ||
adc hl, de ; restore high | ||
djnz .bitLoop | ||
jr .postBit | ||
.greaterEqual: | ||
; keep r -= d, and update quotient | ||
inc c | ||
exx | ||
djnz .bitLoop | ||
.postBit: | ||
exx | ||
ld (ix-16), c ; now that numerator byte isn't needed, overwrite with quotient | ||
dec ix | ||
djnz .byteLoop | ||
|
||
;finish and clean up | ||
|
||
pop bc | ||
ld (ix-27), hl ; use bc's old space for remainder low | ||
pop iy | ||
pop hl ; hl = lower quotient | ||
pop de ; de = upper quotient | ||
exx | ||
ld (ix-18), hl ; use de's old space for remainder high | ||
pop bc | ||
pop de | ||
pop hl | ||
exx | ||
|
||
lea ix, ix+9 ; jump past stuff remaining on the stack | ||
ld sp, ix ; sp = return address | ||
ret ; cleanup finished in slldivuremu |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This optimization seems valid? -10 bytes, -10F cycles.
If valid, the same pattern can be applied to sllor and sllxor, of course.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sorry this took me so long to get back to. This makes sense and works with everything I've thrown at it, so go ahead with or and xor.