-
Notifications
You must be signed in to change notification settings - Fork 4
/
bigint.a65
427 lines (402 loc) · 18.5 KB
/
bigint.a65
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
; bigint: variable precision integers
;---------------------------------------------------------------------
; Parameters specified in the documentation for each function are
; annotated as follows.
; (p) - preserved: value at exit is the same at entry
; (w) - written: value at exit is an intended output value
; (d) - destroyed: value at exit may be different from value at entry
; and is not useful
; Flags should be considered destroyed unless otherwise specified.
; * For pointers (usually in the zero page), two letters are given: the
; first for the pointer itself and the second for the contents of the
; buffer to which it points.
; * Pointers are also given a suffix of subscript 0 or 1 to indicate if
; their indexing is 0-based (`ptr₀` points to the beginning of the
; buffer) or 1-based (`ptr₁` points to the byte before the beginning of
; the buffer). The latter is often used because it's easier and more
; efficient to loop through such a buffer on the 6502.
;---------------------------------------------------------------------
; Bigints are stored as a length byte (≥1) followed by the two's
; complement binary integer. On the 6502 the value is big-endian format.
; The values are normalized: they take up no more bytes than necessary to
; express the number with the high bit of the MSB clear/set for
; positive/negative numbers. The MSB is a sign extension byte of $00
; (positive) or $FF (negative) only if the high bit of the next byte
; indicates the wrong sign.
;
; We use big-endian format, despite being the opposite of native 6502
; order, because we generally want to loop from the LSB to the MSB and
; looping towards zero lets us use the zero compare inherent in the DEY
; instruction rather than having to do a separate CMP against a value
; stored elsewhere.
;---------------------------------------------------------------------
; Routines that do not need memory storage
include "src/mos65/qhex.a65"
;---------------------------------------------------------------------
; Common zero-page storage.
; Temporary storage bytes that may be destroyed by subroutine calls.
; Since subroutines are allowed to destroy these at will, they may
; be used freely, but only between subroutine calls.
temp1: zds 1
temp2: zds 1
; Sign byte for numerical processing; either $00 or $FF. This is shared
; amongst multiple routines that co-operate on processing a number; it
; should only be set once by one routine during processing and used as a
; read-only parameter by all routines called, directly or indirectly, by
; that routine.
sign: zds 1 ; saved sign of input
; Routines that need "buffers," or generic pointers to memory with
; an optional length, use a standard set defined here. `buf0` is the
; "innermost" pointer/len to be used as a parameter for routines
; that do not call other buffer-using routines. Routines that call
; that should, where they can't use the higher values (`buf1`, etc.)
; as their parameters.
;
; Some routines also need a "scratch" buffer for temporary storage,
; generally allocated by the caller based on lengths of other
; parameters. This has its own special pointer and length.
;
; Where routines use multiple buffers, typically these are ouput,
; input and scratch in order of increasing buffer number.
;
; In all cases it's carefully documented whether the pointer, the
; length and the buffer itself are preserved, overwritten or
; destroyed, per the coding at the top of this file.
bufSptr: zds 2 ; pointer to a buffer
bufSlen: zds 1 ; length of that buffer
buf0ptr: zds 2
buf0len: zds 1
buf1ptr: zds 2
buf1len: zds 1
buf2ptr: zds 2
buf2len: zds 1
buf3ptr: zds 2
buf3len: zds 1
;---------------------------------------------------------------------
; Read the ASCII hex two's complement representation of an integer
; and convert it to a bigint. The first bit is the sign, i.e.,
; `FF00` will be read as -256 decimal; a minus prefix is not
; allowed.
;
; X, Y: (d)
; A: (d) buf0ptr length; 1 <= A <= 255
; buf0ptr: (d) pointer to input char buffer
; [buf0ptr]: (p) input chars
; buf1ptr: (d) output buffer, must be length (A+1)/2.
; [buf1ptr]: (w) bigint output
;
bi_readhex ldx #0 ; constant for indirect addressing
; Start by skipping past any leading '0's.
tay ; save length
.stripl0 lda (buf0ptr,x)
cmp #'0'
bne .conv ; no (more) leading zeros; start conversion
dey ; reduce length
beq .conv0 ; but if it's last digit, convert the 0 anyway
incw buf0ptr ; move past leading 0
clc
bcc .stripl0
; Conversion, two input digits at a time
.conv0 iny
.conv tya ; copy input length to A
dey ; length -1 = last byte of input buffer
clc
ror ; output length is 1/2 input length
adc #0 ; plus 1 if input length is odd
sta (buf1ptr,x) ; store output length
adc buf1ptr ; set buf1ptr to end of buffer
sta buf1ptr
bcc .nextbyte
inc buf1ptr+1
.nextbyte lda (buf0ptr),y ; first char for this byte of output buffer
jsr qdigit ; convert ASCII digit to binary
bmi .err ; bad digit, error
cmp #$10
bcs .err ; >=16, error
sta (buf1ptr,x)
dey ; second char for this byte of output buffer
bmi .done ; if input done, return success
lda (buf0ptr),y
jsr qdigit ; convert ASCII digit to binary
bmi .err ; bad digit, error
cmp #$10
bcs .err ; >=16, error
asl ; shift up to high nybble
asl
asl
asl
clc
adc (buf1ptr,x)
sta (buf1ptr,x)
dec buf1ptr
lda buf1ptr
cmp #$FF
bne .nocarry1
dec buf1ptr+1
.nocarry1 dey
cpy #$FF
bne .nextbyte ; next chars
.done rts ; success; return
.err brk ; readhex error
;---------------------------------------------------------------------
; In-place multiply by ten of a big-endian integer. There is no check for
; overflow, so the value must be at least ten times smaller than the
; maximum value that can fit in the buffer. A good heurstic is that the
; most significant five bits (for signed integers, four bits for
; unsigned) should be all zero or all one; any such value will be very
; close to the maximum/minimum value that will not overflow.
;
; Like the 6502 ADC/SBC instructions, this does not care whether you are
; multiplying a signed or unsigned value; you decide which way you want
; to treat it. This allows you an extra bit of precision if you have
; separately kept track of the sign of the input; you can safely overflow
; the sign bit out of the MSB and sign-extend afterwards. For example,
; input [$FF, $00] results in [$30, $30]. If you were considering your
; input to be unsigned (65280₁₀) it overflowed and the result is
; incorrect. But if you were considering your input to be signed
; (-5320₁₀) only the sign bit "overflowed"; you know the output is
; negative and can simply sign-extend extend to $FF3030 to have the
; correct result.
;
; This requires a scratch buffer of the same length as the number to be
; multiplied.
;
; A, Y: (d)
; X: (p)
; buf0len: (p) [buf0ptr₁] length; 1 <= buf0len <= 255
; buf0ptr₁: (pw) pointer to input/output buffer address - 1
; bufSptr₁: (pd) pointer to scratch buffer address - 1
;
; XXX This is often called with small numbers with lots of leading
; zero bytes. Thus, it might be a significant optimization for
; positive numbers to work on only the right-hand portion of the
; buffer from the rightmost zero byte to the end. But for negative
; numbers to use the same technique they would need to be filled
; with $FF sign extension bytes or something like that. It's not
; clear if detection and setting of that should be done by this
; routine or by the caller.
;
; This takes about 96 cycles per byte, with 6-12 cycles of overhead. A
; full 256 byte input takes 24741 cycles. The majority of this, sadly, is
; the LDA/STA because ROL has no indirect indexed addressing mode. This
; could be worked around with self-modifying code, but currently it's not
; worth the effort.
;
bi_x10 ldy buf0len
; Multiply the buffer by 2, also making a copy for later addition
clc
- lda (buf0ptr),y
rol
sta (buf0ptr),y
sta (bufSptr),y
dey
bne -
; Multiply buffer by 2 again, for ×4
ldy buf0len
clc
- lda (buf0ptr),y
rol
sta (buf0ptr),y
dey
bne -
; Multiply buffer by 2 again, for ×8
ldy buf0len
clc
- lda (buf0ptr),y
rol
sta (buf0ptr),y
dey
bne -
; Add the ×2 value, for ×10
ldy buf0len
clc
- lda (bufSptr),y
adc (buf0ptr),y
sta (buf0ptr),y
dey
bne -
rts
; Read the ASCII decimal representation of a positive or negative integer
; and convert it to a non-normalized (i.e., may have leading sign bytes
; as the MSBs) bigint. Leading zeros are processed (though obviously with
; no effect other than slowing the conversion).
;
; No error checking is done; ouput/beahviour is undefined unless the
; following requirements are met.
; • 1 <= length(input) <= 255
; • All input characters are ASCII digits
; • sign = $00 or $FF
; • Output and scratch buffers are large enough to hold the output.
;
; (The output/scratch buffers need not be large enough to hold a valid
; sign; the sign is always what is specified as the sign input,
; regardless of the value of the most significant bit. If the MSB differs
; from the sign you will need to sign-extend the output with a sign byte
; to have a valid number with the sign in it.)
;
; A reasonable heuristic for the scratch/output buffer sizes is half the
; number of digits in the input, rounded up. In the worst case this will
; be about 20% larger than necessary, but for up to 22 digit inputs the
; worst case is only two bytes larger.
;
; buf1ptr₀: (pp) input chars buffer address
; buf1len : (p) input buffer length: 1 <= buf1len <= 255
; sign: (p)
; buf0ptr₁: (pw) pointer to output buffer address - 1; length as above
; buf0len : (p) output buffer length; see conditions above
; bufSptr₁: (pd) scratch buffer address - 1; length same as buf0ptr₀
; curdigit : (d) temporary storage
; A, Y: (d)
; X: (p)
;
; XXX Check the carry to see if there's anything to propagate,
; and shortcut if there isn't?
;
bi_read_decdigits:
; Set output buffer to starting value of zero.
lda #0
ldy buf0len
.clearbuf sta (buf0ptr),y
dey
bne .clearbuf
sta curdigit ; start with most signficant digit (at pos. 0)
beq .skip10x ; BRA; skip initial multiply of zeroed buffer
.digit ; Multiply current intermediate value (buf0ptr) by ten.
jsr bi_x10
.skip10x ; Add in or subtract the current digit.
ldy curdigit
inc curdigit ; for next loop iteration
lda (buf1ptr),y ; load this digit
eor #$30 ; convert ASCII to binary
ldy buf0len ; output LSB index for following add/sub code
bit sign ; load high bit of sign into N flag
bmi .subdigit ; if negative, go subtract
; Add binary value of digit in A into output
.adddigit clc
adc (buf0ptr),y ; add LSB to new digit
sta (buf0ptr),y ; and store new LSB
.addnext dey ; another byte?
beq .nextdigit ; no, carry propagation complete
lda (buf0ptr),y
adc #0 ; propagate carry
sta (buf0ptr),y
clv
bvc .addnext ; BRA
.subdigit ; Subtract binary value of digit from output
sta temp1 ; subtrahend must be in memory
lda (buf0ptr),y ; load current LSB
sec
sbc temp1 ; calculate new LSB
sta (buf0ptr),y ; and store it
.subnext dey ; propagation done?
beq .nextdigit ; yes, move on to next digit
lda (buf0ptr),y ; propagate carry
sbc #0
sta (buf0ptr),y
clv
bvc .subnext ; BRA; continue propagation
.nextdigit ; Propagation complete; on to next digit
lda curdigit ; digits remaining
cmp buf1len
beq .done ; none, we're done
clv
bvc .digit ; BRA; continue with next digit
.done rts
curdigit zds 1 ; temporary storage
; Read the ASCII signed decimal representation of an integer and convert
; it to a bigint. The first character may be a optional `+` or `-` sign;
; all other characters must be ASCII decimal digits. No error checking of
; input characters is done; bad input produces an undefined result.
;
; The temporary buffers pointed to by buf0ptr and bufSptr should be half
; the length of the input rounded up plus one byte , with a minimum
; length of 2. (XXX These should probably be allocated and freed by this
; routine.)
;
; The ouput buffer will be a 1-byte count of the length followed by that
; many bytes. XXX figure out how to handle length allocation of this.
; Probably also allocated by the routine.
;
; Y: (d) [buf1ptr] (input) length; 1 <= A <= 255
; bufSptr₁: (pd) scratch buffer addr - 1; for length see above
; buf0ptr₁: (dd) temp buffer addr - 1; for length see above
; buf1ptr₀: (dp) pointer to start of input chars
; buf2ptr₀: (dw) pointer to output buffer for len+value
;
; A,X: (d)
; buf1len : (d)
;
bi_read_dec
ldx #0 ; constant for indirect addressing, etc.
; Check for and process a `+` or `-` prefix
stx sign ; assume positive
lda (buf1ptr,x)
cmp #'-' ; starts with '-' sign?
bne .checkplus ; no, check for other things
dec sign ; change sign to negative
bmi .dropsign ; BRA; remove sign char
.checkplus cmp #'+' ; starts with a '+' sign?
bne .skipzeros ; no, skip sign char removal
.dropsign ; Remove the sign char from the input
dey ; dec [buf1ptr₀] (input) length
incw buf1ptr ; move input start forward one char
.skipzeros ; Skip past leading zeros (except last char, of course)
clv ; for later BRA
.skipzero cpy #1 ; on last char of input?
beq .onedigit ; yes, special case for single-digit conversion
lda (buf1ptr,x)
cmp #'0' ; leading zero?
bne .convert ; no, start conversion
dey ; otherwise dec length
incw buf1ptr ; and move input ptr forward past the '0'
bvc .skipzero ; BRA
.onedigit ; We have only one char of input left. We special-case 0 because
; it breaks our leading-zero-digit removal during later
; normalization. Also, it may be a common case so fast is nice.
lda (buf1ptr,x)
cmp #'0'
bne .convert ; not zero, continue with standard conversion
ldy #0 ; output length byte location
lda #1 ; output length byte value
sta (buf2ptr),y
tya ; output value (0) to A
iny ; output value location = 1
sta (buf2ptr),y
rts
.convert ; ASCII to binary conversion by bi_read_decdigits.
; buf1ptr₁ already points to start of input
sty buf1len ; set remaining input length
tya ; calculate the temp/scratch buffer length
clc ; ...
ror a ; divide by two, leaving old LSbit in carry
adc #0 ; round up
sta buf0len ; set output buf size
jsr bi_read_decdigits
; Normalize by dropping redundant leading sign bytes
clv ; for later BRA
ldy #1 ; [buf0ptr₁] uses 1-based indexing
.normalize lda (buf0ptr),y
cmp sign
bne .done ; not a sign extension byte, we're normalized
iny ; check following byte
lda (buf0ptr),y
eor sign ; next char's sign bit same as sign?
bmi .done ; no, we're done with normalization, we must
; keep sign extension byte to preserve sign
dey ; set Y back to start of buf for next iteration
dec buf0len ; drop this byte by reducing length by one
incw buf0ptr ; and moving start point up
; XXX no test case covers this increment
; not being 16-bit
bvc .normalize ; BRA
.done ; Produce final result in [buf2ptr₁] with 1-based indexing.
lda buf0len ; Store length as
ldy #0 ; first byte of
sta (buf2ptr ),y ; [buf2ptr₀]
; Copy normalized value to output buffer
ldy buf0len
- lda (buf0ptr),y
sta (buf2ptr),y ; assume output len is that of bi_read_udec
dey
bne -
rts