src/mos65/bigint.a65

;   bigint: variable precision integers

;---------------------------------------------------------------------

;   Parameters specified in the documentation for each function are
;   annotated as follows.
;     (p) - preserved: value at exit is the same at entry
;     (w) - written: value at exit is an intended output value
;     (d) - destroyed: value at exit may be different from value at entry
;           and is not useful
;   Flags should be considered destroyed unless otherwise specified.
;   * For pointers (usually in the zero page), two letters are given: the
;   first for the pointer itself and the second for the contents of the
;   buffer to which it points.
;   * Pointers are also given a suffix of subscript 0 or 1 to indicate if
;   their indexing is 0-based (`ptr₀` points to the beginning of the
;   buffer) or 1-based (`ptr₁` points to the byte before the beginning of
;   the buffer). The latter is often used because it's easier and more
;   efficient to loop through such a buffer on the 6502.

;---------------------------------------------------------------------

;   Bigints are stored as a length byte (≥1) followed by the two's
;   complement binary integer. On the 6502 the value is big-endian format.
;   The values are normalized: they take up no more bytes than necessary to
;   express the number with the high bit of the MSB clear/set for
;   positive/negative numbers. The MSB is a sign extension byte of $00
;   (positive) or $FF (negative) only if the high bit of the next byte
;   indicates the wrong sign.
;
;   We use big-endian format, despite being the opposite of native 6502
;   order, because we generally want to loop from the LSB to the MSB and
;   looping towards zero lets us use the zero compare inherent in the DEY
;   instruction rather than having to do a separate CMP against a value
;   stored elsewhere.

;---------------------------------------------------------------------
;   Routines that do not need memory storage

            include "src/mos65/qhex.a65"

;---------------------------------------------------------------------
;   Common zero-page storage.

;   Temporary storage bytes that may be destroyed by subroutine calls.
;   Since subroutines are allowed to destroy these at will, they may
;   be used freely, but only between subroutine calls.
temp1:      zds     1
temp2:      zds     1

;   Sign byte for numerical processing; either $00 or $FF. This is shared
;   amongst multiple routines that co-operate on processing a number; it
;   should only be set once by one routine during processing and used as a
;   read-only parameter by all routines called, directly or indirectly, by
;   that routine.
sign:       zds 1               ; saved sign of input

;   Routines that need "buffers," or generic pointers to memory with
;   an optional length, use a standard set defined here. `buf0` is the
;   "innermost" pointer/len to be used as a parameter for routines
;   that do not call other buffer-using routines. Routines that call
;   that should, where they can't use the higher values (`buf1`, etc.)
;   as their parameters.
;
;   Some routines also need a "scratch" buffer for temporary storage,
;   generally allocated by the caller based on lengths of other
;   parameters. This has its own special pointer and length.
;
;   Where routines use multiple buffers, typically these are ouput,
;   input and scratch in order of increasing buffer number.
;
;   In all cases it's carefully documented whether the pointer, the
;   length and the buffer itself are preserved, overwritten or
;   destroyed, per the coding at the top of this file.

bufSptr:    zds  2              ; pointer to a buffer
bufSlen:    zds  1              ; length of that buffer
buf0ptr:    zds  2
buf0len:    zds  1
buf1ptr:    zds  2
buf1len:    zds  1
buf2ptr:    zds  2
buf2len:    zds  1
buf3ptr:    zds  2
buf3len:    zds  1

;---------------------------------------------------------------------
;   Read the ASCII hex two's complement representation of an integer
;   and convert it to a bigint. The first bit is the sign, i.e.,
;   `FF00` will be read as -256 decimal; a minus prefix is not
;   allowed.
;
;            X, Y: (d)
;               A: (d) buf0ptr length; 1 <= A <= 255
;         buf0ptr: (d) pointer to input char buffer
;       [buf0ptr]: (p) input chars
;         buf1ptr: (d) output buffer, must be length (A+1)/2.
;       [buf1ptr]: (w) bigint output
;
bi_readhex  ldx #0              ; constant for indirect addressing
            ;   Start by skipping past any leading '0's.
            tay                 ; save length
.stripl0    lda (buf0ptr,x)
            cmp #'0'
            bne .conv           ; no (more) leading zeros; start conversion
            dey                 ; reduce length
            beq .conv0          ; but if it's last digit, convert the 0 anyway
            incw buf0ptr        ; move past leading 0
            clc
            bcc .stripl0
            ;   Conversion, two input digits at a time
.conv0      iny
.conv       tya                 ; copy input length to A
            dey                 ; length -1 = last byte of input buffer
            clc
            ror                 ; output length is 1/2 input length
            adc #0              ; plus 1 if input length is odd
            sta (buf1ptr,x)     ; store output length
            adc buf1ptr         ; set buf1ptr to end of buffer
            sta buf1ptr
            bcc .nextbyte
            inc buf1ptr+1
.nextbyte   lda (buf0ptr),y     ; first char for this byte of output buffer
            jsr qdigit          ; convert ASCII digit to binary
            bmi .err            ; bad digit, error
            cmp #$10
            bcs .err            ; >=16, error
            sta (buf1ptr,x)
            dey                 ; second char for this byte of output buffer
            bmi .done           ; if input done, return success
            lda (buf0ptr),y
            jsr qdigit          ; convert ASCII digit to binary
            bmi .err            ; bad digit, error
            cmp #$10
            bcs .err            ; >=16, error
            asl                 ; shift up to high nybble
            asl
            asl
            asl
            clc
            adc (buf1ptr,x)
            sta (buf1ptr,x)
            dec buf1ptr
            lda buf1ptr
            cmp #$FF
            bne .nocarry1
            dec buf1ptr+1
.nocarry1   dey
            cpy #$FF
            bne .nextbyte       ; next chars
.done       rts                 ; success; return
.err        brk                 ; readhex error

;---------------------------------------------------------------------

;   In-place multiply by ten of a big-endian integer. There is no check for
;   overflow, so the value must be at least ten times smaller than the
;   maximum value that can fit in the buffer. A good heurstic is that the
;   most significant five bits (for signed integers, four bits for
;   unsigned) should be all zero or all one; any such value will be very
;   close to the maximum/minimum value that will not overflow.
;
;   Like the 6502 ADC/SBC instructions, this does not care whether you are
;   multiplying a signed or unsigned value; you decide which way you want
;   to treat it. This allows you an extra bit of precision if you have
;   separately kept track of the sign of the input; you can safely overflow
;   the sign bit out of the MSB and sign-extend afterwards. For example,
;   input [$FF, $00] results in [$30, $30]. If you were considering your
;   input to be unsigned (65280₁₀) it overflowed and the result is
;   incorrect. But if you were considering your input to be signed
;   (-5320₁₀) only the sign bit "overflowed"; you know the output is
;   negative and can simply sign-extend extend to $FF3030 to have the
;   correct result.
;
;   This requires a scratch buffer of the same length as the number to be
;   multiplied.
;
;          A, Y: (d)
;             X: (p)
;       buf0len: (p)  [buf0ptr₁] length; 1 <= buf0len <= 255
;      buf0ptr₁: (pw) pointer to input/output buffer address - 1
;      bufSptr₁: (pd) pointer to scratch buffer address - 1
;
;   XXX This is often called with small numbers with lots of leading
;   zero bytes. Thus, it might be a significant optimization for
;   positive numbers to work on only the right-hand portion of the
;   buffer from the rightmost zero byte to the end. But for negative
;   numbers to use the same technique they would need to be filled
;   with $FF sign extension bytes or something like that. It's not
;   clear if detection and setting of that should be done by this
;   routine or by the caller.
;
;   This takes about 96 cycles per byte, with 6-12 cycles of overhead. A
;   full 256 byte input takes 24741 cycles. The majority of this, sadly, is
;   the LDA/STA because ROL has no indirect indexed addressing mode. This
;   could be worked around with self-modifying code, but currently it's not
;   worth the effort.
;
bi_x10      ldy buf0len
            ;   Multiply the buffer by 2, also making a copy for later addition
            clc
-           lda (buf0ptr),y
            rol
            sta (buf0ptr),y
            sta (bufSptr),y
            dey
            bne -
            ;   Multiply buffer by 2 again, for ×4
            ldy buf0len
            clc
-           lda (buf0ptr),y
            rol
            sta (buf0ptr),y
            dey
            bne -
            ;   Multiply buffer by 2 again, for ×8
            ldy buf0len
            clc
-           lda (buf0ptr),y
            rol
            sta (buf0ptr),y
            dey
            bne -
            ;   Add the ×2 value, for ×10
            ldy buf0len
            clc
-           lda (bufSptr),y
            adc (buf0ptr),y
            sta (buf0ptr),y
            dey
            bne -
            rts

;   Read the ASCII decimal representation of a positive or negative integer
;   and convert it to a non-normalized (i.e., may have leading sign bytes
;   as the MSBs) bigint. Leading zeros are processed (though obviously with
;   no effect other than slowing the conversion).
;
;   No error checking is done; ouput/beahviour is undefined unless the
;   following requirements are met.
;   • 1 <= length(input) <= 255
;   • All input characters are ASCII digits
;   • sign = $00 or $FF
;   • Output and scratch buffers are large enough to hold the output.
;
;   (The output/scratch buffers need not be large enough to hold a valid
;   sign; the sign is always what is specified as the sign input,
;   regardless of the value of the most significant bit. If the MSB differs
;   from the sign you will need to sign-extend the output with a sign byte
;   to have a valid number with the sign in it.)
;
;   A reasonable heuristic for the scratch/output buffer sizes is half the
;   number of digits in the input, rounded up. In the worst case this will
;   be about 20% larger than necessary, but for up to 22 digit inputs the
;   worst case is only two bytes larger.
;
;     buf1ptr₀: (pp) input chars buffer address
;     buf1len : (p)  input buffer length: 1 <= buf1len <= 255
;         sign: (p)
;     buf0ptr₁: (pw) pointer to output buffer address - 1; length as above
;     buf0len : (p)  output buffer length; see conditions above
;     bufSptr₁: (pd) scratch buffer address - 1; length same as buf0ptr₀
;    curdigit : (d)  temporary storage
;         A, Y: (d)
;            X: (p)
;
;   XXX Check the carry to see if there's anything to propagate,
;   and shortcut if there isn't?
;
bi_read_decdigits:
            ;   Set output buffer to starting value of zero.
            lda #0
            ldy buf0len
.clearbuf   sta (buf0ptr),y
            dey
            bne .clearbuf
            sta curdigit        ; start with most signficant digit (at pos. 0)
            beq .skip10x        ; BRA; skip initial multiply of zeroed buffer

.digit      ;   Multiply current intermediate value (buf0ptr) by ten.
            jsr bi_x10
.skip10x    ;   Add in or subtract the current digit.
            ldy curdigit
            inc curdigit        ; for next loop iteration
            lda (buf1ptr),y     ; load this digit
            eor #$30            ; convert ASCII to binary
            ldy buf0len         ; output LSB index for following add/sub code
            bit sign            ; load high bit of sign into N flag
            bmi .subdigit       ; if negative, go subtract

            ;   Add binary value of digit in A into output
.adddigit   clc
            adc (buf0ptr),y     ; add LSB to new digit
            sta (buf0ptr),y     ;   and store new LSB
.addnext    dey                 ; another byte?
            beq .nextdigit      ; no, carry propagation complete
            lda (buf0ptr),y
            adc #0              ; propagate carry
            sta (buf0ptr),y
            clv
            bvc .addnext        ; BRA

.subdigit   ;   Subtract binary value of digit from output
            sta temp1           ; subtrahend must be in memory
            lda (buf0ptr),y     ; load current LSB
            sec
            sbc temp1           ; calculate new LSB
            sta (buf0ptr),y     ;   and store it
.subnext    dey                 ; propagation done?
            beq .nextdigit      ; yes, move on to next digit
            lda (buf0ptr),y     ; propagate carry
            sbc #0
            sta (buf0ptr),y
            clv
            bvc .subnext        ; BRA; continue propagation

.nextdigit  ;   Propagation complete; on to next digit
            lda curdigit        ; digits remaining
            cmp buf1len
            beq .done           ; none, we're done
            clv
            bvc .digit          ; BRA; continue with next digit
.done       rts

curdigit    zds 1               ; temporary storage

;   Read the ASCII signed decimal representation of an integer and convert
;   it to a bigint. The first character may be a optional `+` or `-` sign;
;   all other characters must be ASCII decimal digits. No error checking of
;   input characters is done; bad input produces an undefined result.
;
;   The temporary buffers pointed to by buf0ptr and bufSptr should be half
;   the length of the input rounded up plus one byte , with a minimum
;   length of 2. (XXX These should probably be allocated and freed by this
;   routine.)
;
;   The ouput buffer will be a 1-byte count of the length followed by that
;   many bytes. XXX figure out how to handle length allocation of this.
;   Probably also allocated by the routine.
;
;              Y: (d)  [buf1ptr] (input) length; 1 <= A <= 255
;       bufSptr₁: (pd) scratch buffer addr - 1; for length see above
;       buf0ptr₁: (dd) temp buffer addr - 1; for length see above
;       buf1ptr₀: (dp) pointer to start of input chars
;       buf2ptr₀: (dw) pointer to output buffer for len+value
;
;            A,X: (d)
;       buf1len : (d)
;
bi_read_dec
            ldx #0              ; constant for indirect addressing, etc.
            ;   Check for and process a `+` or `-` prefix
            stx sign            ; assume positive
            lda (buf1ptr,x)
            cmp #'-'            ; starts with '-' sign?
            bne .checkplus      ; no, check for other things
            dec sign            ; change sign to negative
            bmi .dropsign       ; BRA; remove sign char
.checkplus  cmp #'+'            ; starts with a '+' sign?
            bne .skipzeros      ; no, skip sign char removal
.dropsign   ;   Remove the sign char from the input
            dey                 ; dec [buf1ptr₀] (input) length
            incw buf1ptr        ; move input start forward one char
.skipzeros  ;   Skip past leading zeros (except last char, of course)
            clv                 ; for later BRA
.skipzero   cpy #1              ; on last char of input?
            beq .onedigit       ; yes, special case for single-digit conversion
            lda (buf1ptr,x)
            cmp #'0'            ; leading zero?
            bne .convert        ; no, start conversion
            dey                 ; otherwise dec length
            incw buf1ptr        ;   and move input ptr forward past the '0'
            bvc .skipzero       ; BRA

.onedigit   ;   We have only one char of input left. We special-case 0 because
            ;   it breaks our leading-zero-digit removal during later
            ;   normalization. Also, it may be a common case so fast is nice.
            lda (buf1ptr,x)
            cmp #'0'
            bne .convert        ; not zero, continue with standard conversion
            ldy #0              ; output length byte location
            lda #1              ; output length byte value
            sta (buf2ptr),y
            tya                 ; output value (0) to A
            iny                 ; output value location = 1
            sta (buf2ptr),y
            rts

.convert    ;   ASCII to binary conversion by bi_read_decdigits.
                                ; buf1ptr₁ already points to start of input
            sty buf1len         ; set remaining input length
            tya                 ; calculate the temp/scratch buffer length
            clc                 ;   ...
            ror a               ;   divide by two, leaving old LSbit in carry
            adc #0              ;   round up
            sta buf0len         ;   set output buf size
            jsr bi_read_decdigits

            ;   Normalize by dropping redundant leading sign bytes
            clv                 ; for later BRA
            ldy #1              ; [buf0ptr₁] uses 1-based indexing
.normalize  lda (buf0ptr),y
            cmp sign
            bne .done           ; not a sign extension byte, we're normalized
            iny                 ; check following byte
            lda (buf0ptr),y
            eor sign            ; next char's sign bit same as sign?
            bmi .done           ; no, we're done with normalization, we must
                                ;   keep sign extension byte to preserve sign
            dey                 ; set Y back to start of buf for next iteration
            dec buf0len         ; drop this byte by reducing length by one
            incw buf0ptr        ;   and moving start point up
                                ;   XXX no test case covers this increment
                                ;       not being 16-bit
            bvc .normalize      ; BRA

.done       ;   Produce final result in [buf2ptr₁] with 1-based indexing.
            lda buf0len         ; Store length as
            ldy #0              ;   first byte of
            sta (buf2ptr ),y    ;   [buf2ptr₀]
            ;   Copy normalized value to output buffer
            ldy buf0len
-           lda (buf0ptr),y
            sta (buf2ptr),y     ; assume output len is that of bi_read_udec
            dey
            bne -
            rts