/
modnscalar.go
1088 lines (989 loc) · 42.3 KB
/
modnscalar.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
// Copyright (c) 2020 The Decred developers
// Use of this source code is governed by an ISC
// license that can be found in the LICENSE file.
package secp256k1
import (
"encoding/hex"
"math/big"
)
// References:
// [SECG]: Recommended Elliptic Curve Domain Parameters
// https://www.secg.org/sec2-v2.pdf
//
// [HAC]: Handbook of Applied Cryptography Menezes, van Oorschot, Vanstone.
// http://cacr.uwaterloo.ca/hac/
// Many elliptic curve operations require working with scalars in a finite field
// characterized by the order of the group underlying the secp256k1 curve.
// Given this precision is larger than the biggest available native type,
// obviously some form of bignum math is needed. This code implements
// specialized fixed-precision field arithmetic rather than relying on an
// arbitrary-precision arithmetic package such as math/big for dealing with the
// math modulo the group order since the size is known. As a result, rather
// large performance gains are achieved by taking advantage of many
// optimizations not available to arbitrary-precision arithmetic and generic
// modular arithmetic algorithms.
//
// There are various ways to internally represent each element. For example,
// the most obvious representation would be to use an array of 4 uint64s (64
// bits * 4 = 256 bits). However, that representation suffers from the fact
// that there is no native Go type large enough to handle the intermediate
// results while adding or multiplying two 64-bit numbers.
//
// Given the above, this implementation represents the field elements as 8
// uint32s with each word (array entry) treated as base 2^32. This was chosen
// because most systems at the current time are 64-bit (or at least have 64-bit
// registers available for specialized purposes such as MMX) so the intermediate
// results can typically be done using a native register (and using uint64s to
// avoid the need for additional half-word arithmetic)
const (
// These fields provide convenient access to each of the words of the
// secp256k1 curve group order N to improve code readability.
//
// The group order of the curve per [SECG] is:
// 0xffffffff ffffffff ffffffff fffffffe baaedce6 af48a03b bfd25e8c d0364141
orderWordZero uint32 = 0xd0364141
orderWordOne uint32 = 0xbfd25e8c
orderWordTwo uint32 = 0xaf48a03b
orderWordThree uint32 = 0xbaaedce6
orderWordFour uint32 = 0xfffffffe
orderWordFive uint32 = 0xffffffff
orderWordSix uint32 = 0xffffffff
orderWordSeven uint32 = 0xffffffff
// These fields provide convenient access to each of the words of the two's
// complement of the secp256k1 curve group order N to improve code
// readability.
//
// The two's complement of the group order is:
// 0x00000000 00000000 00000000 00000001 45512319 50b75fc4 402da173 2fc9bebf
orderComplementWordZero uint32 = (^orderWordZero) + 1
orderComplementWordOne uint32 = ^orderWordOne
orderComplementWordTwo uint32 = ^orderWordTwo
orderComplementWordThree uint32 = ^orderWordThree
//orderComplementWordFour uint32 = ^orderWordFour // unused
//orderComplementWordFive uint32 = ^orderWordFive // unused
//orderComplementWordSix uint32 = ^orderWordSix // unused
//orderComplementWordSeven uint32 = ^orderWordSeven // unused
// These fields provide convenient access to each of the words of the
// secp256k1 curve group order N / 2 to improve code readability and avoid
// the need to recalculate them.
//
// The half order of the secp256k1 curve group is:
// 0x7fffffff ffffffff ffffffff ffffffff 5d576e73 57a4501d dfe92f46 681b20a0
halfOrderWordZero uint32 = 0x681b20a0
halfOrderWordOne uint32 = 0xdfe92f46
halfOrderWordTwo uint32 = 0x57a4501d
halfOrderWordThree uint32 = 0x5d576e73
halfOrderWordFour uint32 = 0xffffffff
halfOrderWordFive uint32 = 0xffffffff
halfOrderWordSix uint32 = 0xffffffff
halfOrderWordSeven uint32 = 0x7fffffff
// uint32Mask is simply a mask with all bits set for a uint32 and is used to
// improve the readability of the code.
uint32Mask = 0xffffffff
)
var (
// zero32 is an array of 32 bytes used for the purposes of zeroing and is
// defined here to avoid extra allocations.
zero32 = [32]byte{}
)
// ModNScalar implements optimized 256-bit constant-time fixed-precision
// arithmetic over the secp256k1 group order. This means all arithmetic is
// performed modulo:
//
// 0xfffffffffffffffffffffffffffffffebaaedce6af48a03bbfd25e8cd0364141
//
// It only implements the arithmetic needed for elliptic curve operations,
// however, the operations that are not implemented can typically be worked
// around if absolutely needed. For example, subtraction can be performed by
// adding the negation.
//
// Should it be absolutely necessary, conversion to the standard library
// math/big.Int can be accomplished by using the Bytes method, slicing the
// resulting fixed-size array, and feeding it to big.Int.SetBytes. However,
// that should typically be avoided when possible as conversion to big.Ints
// requires allocations, is not constant time, and is slower when working modulo
// the group order.
type ModNScalar struct {
// The scalar is represented as 8 32-bit integers in base 2^32.
//
// The following depicts the internal representation:
// ---------------------------------------------------------
// | n[7] | n[6] | ... | n[0] |
// | 32 bits | 32 bits | ... | 32 bits |
// | Mult: 2^(32*7) | Mult: 2^(32*6) | ... | Mult: 2^(32*0) |
// ---------------------------------------------------------
//
// For example, consider the number 2^87 + 2^42 + 1. It would be
// represented as:
// n[0] = 1
// n[1] = 2^10
// n[2] = 2^23
// n[3..7] = 0
//
// The full 256-bit value is then calculated by looping i from 7..0 and
// doing sum(n[i] * 2^(32i)) like so:
// n[7] * 2^(32*7) = 0 * 2^224 = 0
// n[6] * 2^(32*6) = 0 * 2^192 = 0
// ...
// n[2] * 2^(32*2) = 2^23 * 2^64 = 2^87
// n[1] * 2^(32*1) = 2^10 * 2^32 = 2^42
// n[0] * 2^(32*0) = 1 * 2^0 = 1
// Sum: 0 + 0 + ... + 2^87 + 2^42 + 1 = 2^87 + 2^42 + 1
n [8]uint32
}
// String returns the scalar as a human-readable hex string.
//
// This is NOT constant time.
func (s ModNScalar) String() string {
b := s.Bytes()
return hex.EncodeToString(b[:])
}
// Set sets the scalar equal to a copy of the passed one in constant time.
//
// The scalar is returned to support chaining. This enables syntax like:
// s := new(ModNScalar).Set(s2).Add(1) so that s = s2 + 1 where s2 is not
// modified.
func (s *ModNScalar) Set(val *ModNScalar) *ModNScalar {
*s = *val
return s
}
// Zero sets the scalar to zero in constant time. A newly created scalar is
// already set to zero. This function can be useful to clear an existing scalar
// for reuse.
func (s *ModNScalar) Zero() {
s.n[0] = 0
s.n[1] = 0
s.n[2] = 0
s.n[3] = 0
s.n[4] = 0
s.n[5] = 0
s.n[6] = 0
s.n[7] = 0
}
// IsZero returns whether or not the scalar is equal to zero in constant time.
func (s *ModNScalar) IsZero() bool {
// The scalar can only be zero if no bits are set in any of the words.
bits := s.n[0] | s.n[1] | s.n[2] | s.n[3] | s.n[4] | s.n[5] | s.n[6] | s.n[7]
return bits == 0
}
// SetInt sets the scalar to the passed integer in constant time. This is a
// convenience function since it is fairly common to perform some arithmetic
// with small native integers.
//
// The scalar is returned to support chaining. This enables syntax like:
// s := new(ModNScalar).SetInt(2).Mul(s2) so that s = 2 * s2.
func (s *ModNScalar) SetInt(ui uint32) *ModNScalar {
s.Zero()
s.n[0] = ui
return s
}
// constantTimeEq returns 1 if a == b or 0 otherwise in constant time.
func constantTimeEq(a, b uint32) uint32 {
return uint32((uint64(a^b) - 1) >> 63)
}
// constantTimeNotEq returns 1 if a != b or 0 otherwise in constant time.
func constantTimeNotEq(a, b uint32) uint32 {
return ^uint32((uint64(a^b)-1)>>63) & 1
}
// constantTimeLess returns 1 if a < b or 0 otherwise in constant time.
func constantTimeLess(a, b uint32) uint32 {
return uint32((uint64(a) - uint64(b)) >> 63)
}
// constantTimeLessOrEq returns 1 if a <= b or 0 otherwise in constant time.
func constantTimeLessOrEq(a, b uint32) uint32 {
return uint32((uint64(a) - uint64(b) - 1) >> 63)
}
// constantTimeGreater returns 1 if a > b or 0 otherwise in constant time.
func constantTimeGreater(a, b uint32) uint32 {
return constantTimeLess(b, a)
}
// constantTimeGreaterOrEq returns 1 if a >= b or 0 otherwise in constant time.
func constantTimeGreaterOrEq(a, b uint32) uint32 {
return constantTimeLessOrEq(b, a)
}
// constantTimeMin returns min(a,b) in constant time.
func constantTimeMin(a, b uint32) uint32 {
return b ^ ((a ^ b) & -constantTimeLess(a, b))
}
// overflows determines if the current scalar is greater than or equal to the
// group order in constant time and returns 1 if it is or 0 otherwise.
func (s *ModNScalar) overflows() uint32 {
// The intuition here is that the scalar is greater than the group order if
// one of the higher individual words is greater than corresponding word of
// the group order and all higher words in the scalar are equal to their
// corresponding word of the group order. Since this type is modulo the
// group order, being equal is also an overflow back to 0.
//
// Note that the words 5, 6, and 7 are all the max uint32 value, so there is
// no need to test if those individual words of the scalar exceeds them,
// hence, only equality is checked for them.
highWordsEqual := constantTimeEq(s.n[7], orderWordSeven)
highWordsEqual &= constantTimeEq(s.n[6], orderWordSix)
highWordsEqual &= constantTimeEq(s.n[5], orderWordFive)
overflow := highWordsEqual & constantTimeGreater(s.n[4], orderWordFour)
highWordsEqual &= constantTimeEq(s.n[4], orderWordFour)
overflow |= highWordsEqual & constantTimeGreater(s.n[3], orderWordThree)
highWordsEqual &= constantTimeEq(s.n[3], orderWordThree)
overflow |= highWordsEqual & constantTimeGreater(s.n[2], orderWordTwo)
highWordsEqual &= constantTimeEq(s.n[2], orderWordTwo)
overflow |= highWordsEqual & constantTimeGreater(s.n[1], orderWordOne)
highWordsEqual &= constantTimeEq(s.n[1], orderWordOne)
overflow |= highWordsEqual & constantTimeGreaterOrEq(s.n[0], orderWordZero)
return overflow
}
// reduce256 reduces the current scalar modulo the group order in accordance
// with the overflows parameter in constant time. The overflows parameter
// specifies whether or not the scalar is known to be greater than the group
// order and MUST either be 1 in the case it is or 0 in the case it is not for a
// correct result.
func (s *ModNScalar) reduce256(overflows uint32) {
// Notice that since s < 2^256 < 2N (where N is the group order), the max
// possible number of reductions required is one. Therefore, in the case a
// reduction is needed, it can be performed with a single subtraction of N.
// Also, recall that subtraction is equivalent to addition by the two's
// complement while ignoring the carry.
//
// When s >= N, the overflows parameter will be 1. Conversely, it will be 0
// when s < N. Thus multiplying by the overflows parameter will either
// result in 0 or the multiplicand itself.
//
// Combining the above along with the fact that s + 0 = s, the following is
// a constant time implementation that works by either adding 0 or the two's
// complement of N as needed.
//
// The final result will be in the range 0 <= s < N as expected.
overflows64 := uint64(overflows)
c := uint64(s.n[0]) + overflows64*uint64(orderComplementWordZero)
s.n[0] = uint32(c & uint32Mask)
c = (c >> 32) + uint64(s.n[1]) + overflows64*uint64(orderComplementWordOne)
s.n[1] = uint32(c & uint32Mask)
c = (c >> 32) + uint64(s.n[2]) + overflows64*uint64(orderComplementWordTwo)
s.n[2] = uint32(c & uint32Mask)
c = (c >> 32) + uint64(s.n[3]) + overflows64*uint64(orderComplementWordThree)
s.n[3] = uint32(c & uint32Mask)
c = (c >> 32) + uint64(s.n[4]) + overflows64 // * 1
s.n[4] = uint32(c & uint32Mask)
c = (c >> 32) + uint64(s.n[5]) // + overflows64 * 0
s.n[5] = uint32(c & uint32Mask)
c = (c >> 32) + uint64(s.n[6]) // + overflows64 * 0
s.n[6] = uint32(c & uint32Mask)
c = (c >> 32) + uint64(s.n[7]) // + overflows64 * 0
s.n[7] = uint32(c & uint32Mask)
}
// SetBytes interprets the provided array as a 256-bit big-endian unsigned
// integer, reduces it modulo the group order, sets the scalar to the result,
// and returns either 1 if it was reduced (aka it overflowed) or 0 otherwise in
// constant time.
//
// Note that a bool is not used here because it is not possible in Go to convert
// from a bool to numeric value in constant time and many constant-time
// operations require a numeric value.
func (s *ModNScalar) SetBytes(b *[32]byte) uint32 {
// Pack the 256 total bits across the 8 uint32 words. This could be done
// with a for loop, but benchmarks show this unrolled version is about 2
// times faster than the variant that uses a loop.
s.n[0] = uint32(b[31]) | uint32(b[30])<<8 | uint32(b[29])<<16 | uint32(b[28])<<24
s.n[1] = uint32(b[27]) | uint32(b[26])<<8 | uint32(b[25])<<16 | uint32(b[24])<<24
s.n[2] = uint32(b[23]) | uint32(b[22])<<8 | uint32(b[21])<<16 | uint32(b[20])<<24
s.n[3] = uint32(b[19]) | uint32(b[18])<<8 | uint32(b[17])<<16 | uint32(b[16])<<24
s.n[4] = uint32(b[15]) | uint32(b[14])<<8 | uint32(b[13])<<16 | uint32(b[12])<<24
s.n[5] = uint32(b[11]) | uint32(b[10])<<8 | uint32(b[9])<<16 | uint32(b[8])<<24
s.n[6] = uint32(b[7]) | uint32(b[6])<<8 | uint32(b[5])<<16 | uint32(b[4])<<24
s.n[7] = uint32(b[3]) | uint32(b[2])<<8 | uint32(b[1])<<16 | uint32(b[0])<<24
// The value might be >= N, so reduce it as required and return whether or
// not it was reduced.
needsReduce := s.overflows()
s.reduce256(needsReduce)
return needsReduce
}
// zeroArray32 zeroes the provided 32-byte buffer.
func zeroArray32(b *[32]byte) {
copy(b[:], zero32[:])
}
// SetByteSlice interprets the provided slice as a 256-bit big-endian unsigned
// integer (meaning it is truncated to the first 32 bytes), reduces it modulo
// the group order, sets the scalar to the result, and returns whether or not
// the resulting truncated 256-bit integer overflowed in constant time.
//
// Note that since passing a slice with more than 32 bytes is truncated, it is
// possible that the truncated value is less than the order of the curve and
// hence it will not be reported as having overflowed in that case. It is up to
// the caller to decide whether it needs to provide numbers of the appropriate
// size or it is acceptable to use this function with the described truncation
// and overflow behavior.
func (s *ModNScalar) SetByteSlice(b []byte) bool {
var b32 [32]byte
b = b[:constantTimeMin(uint32(len(b)), 32)]
copy(b32[:], b32[:32-len(b)])
copy(b32[32-len(b):], b)
result := s.SetBytes(&b32)
zeroArray32(&b32)
return result != 0
}
// PutBytesUnchecked unpacks the scalar to a 32-byte big-endian value directly
// into the passed byte slice in constant time. The target slice must must have
// at least 32 bytes available or it will panic.
//
// There is a similar function, PutBytes, which unpacks the scalar into a
// 32-byte array directly. This version is provided since it can be useful to
// write directly into part of a larger buffer without needing a separate
// allocation.
//
// Preconditions:
// - The target slice MUST have at least 32 bytes available
func (s *ModNScalar) PutBytesUnchecked(b []byte) {
// Unpack the 256 total bits from the 8 uint32 words. This could be done
// with a for loop, but benchmarks show this unrolled version is about 2
// times faster than the variant which uses a loop.
b[31] = byte(s.n[0])
b[30] = byte(s.n[0] >> 8)
b[29] = byte(s.n[0] >> 16)
b[28] = byte(s.n[0] >> 24)
b[27] = byte(s.n[1])
b[26] = byte(s.n[1] >> 8)
b[25] = byte(s.n[1] >> 16)
b[24] = byte(s.n[1] >> 24)
b[23] = byte(s.n[2])
b[22] = byte(s.n[2] >> 8)
b[21] = byte(s.n[2] >> 16)
b[20] = byte(s.n[2] >> 24)
b[19] = byte(s.n[3])
b[18] = byte(s.n[3] >> 8)
b[17] = byte(s.n[3] >> 16)
b[16] = byte(s.n[3] >> 24)
b[15] = byte(s.n[4])
b[14] = byte(s.n[4] >> 8)
b[13] = byte(s.n[4] >> 16)
b[12] = byte(s.n[4] >> 24)
b[11] = byte(s.n[5])
b[10] = byte(s.n[5] >> 8)
b[9] = byte(s.n[5] >> 16)
b[8] = byte(s.n[5] >> 24)
b[7] = byte(s.n[6])
b[6] = byte(s.n[6] >> 8)
b[5] = byte(s.n[6] >> 16)
b[4] = byte(s.n[6] >> 24)
b[3] = byte(s.n[7])
b[2] = byte(s.n[7] >> 8)
b[1] = byte(s.n[7] >> 16)
b[0] = byte(s.n[7] >> 24)
}
// PutBytes unpacks the scalar to a 32-byte big-endian value using the passed
// byte array in constant time.
//
// There is a similar function, PutBytesUnchecked, which unpacks the scalar into
// a slice that must have at least 32 bytes available. This version is provided
// since it can be useful to write directly into an array that is type checked.
//
// Alternatively, there is also Bytes, which unpacks the scalar into a new array
// and returns that which can sometimes be more ergonomic in applications that
// aren't concerned about an additional copy.
func (s *ModNScalar) PutBytes(b *[32]byte) {
s.PutBytesUnchecked(b[:])
}
// Bytes unpacks the scalar to a 32-byte big-endian value in constant time.
//
// See PutBytes and PutBytesUnchecked for variants that allow an array or slice
// to be passed which can be useful to cut down on the number of allocations
// by allowing the caller to reuse a buffer or write directly into part of a
// larger buffer.
func (s *ModNScalar) Bytes() [32]byte {
var b [32]byte
s.PutBytesUnchecked(b[:])
return b
}
// IsOdd returns whether or not the scalar is an odd number in constant time.
func (s *ModNScalar) IsOdd() bool {
// Only odd numbers have the bottom bit set.
return s.n[0]&1 == 1
}
// Equals returns whether or not the two scalars are the same in constant time.
func (s *ModNScalar) Equals(val *ModNScalar) bool {
// Xor only sets bits when they are different, so the two scalars can only
// be the same if no bits are set after xoring each word.
bits := (s.n[0] ^ val.n[0]) | (s.n[1] ^ val.n[1]) | (s.n[2] ^ val.n[2]) |
(s.n[3] ^ val.n[3]) | (s.n[4] ^ val.n[4]) | (s.n[5] ^ val.n[5]) |
(s.n[6] ^ val.n[6]) | (s.n[7] ^ val.n[7])
return bits == 0
}
// Add2 adds the passed two scalars together modulo the group order in constant
// time and stores the result in s.
//
// The scalar is returned to support chaining. This enables syntax like:
// s3.Add2(s, s2).AddInt(1) so that s3 = s + s2 + 1.
func (s *ModNScalar) Add2(val1, val2 *ModNScalar) *ModNScalar {
c := uint64(val1.n[0]) + uint64(val2.n[0])
s.n[0] = uint32(c & uint32Mask)
c = (c >> 32) + uint64(val1.n[1]) + uint64(val2.n[1])
s.n[1] = uint32(c & uint32Mask)
c = (c >> 32) + uint64(val1.n[2]) + uint64(val2.n[2])
s.n[2] = uint32(c & uint32Mask)
c = (c >> 32) + uint64(val1.n[3]) + uint64(val2.n[3])
s.n[3] = uint32(c & uint32Mask)
c = (c >> 32) + uint64(val1.n[4]) + uint64(val2.n[4])
s.n[4] = uint32(c & uint32Mask)
c = (c >> 32) + uint64(val1.n[5]) + uint64(val2.n[5])
s.n[5] = uint32(c & uint32Mask)
c = (c >> 32) + uint64(val1.n[6]) + uint64(val2.n[6])
s.n[6] = uint32(c & uint32Mask)
c = (c >> 32) + uint64(val1.n[7]) + uint64(val2.n[7])
s.n[7] = uint32(c & uint32Mask)
// The result is now 256 bits, but it might still be >= N, so use the
// existing normal reduce method for 256-bit values.
s.reduce256(uint32(c>>32) + s.overflows())
return s
}
// Add adds the passed scalar to the existing one modulo the group order in
// constant time and stores the result in s.
//
// The scalar is returned to support chaining. This enables syntax like:
// s.Add(s2).AddInt(1) so that s = s + s2 + 1.
func (s *ModNScalar) Add(val *ModNScalar) *ModNScalar {
return s.Add2(s, val)
}
// accumulator96 provides a 96-bit accumulator for use in the intermediate
// calculations requiring more than 64-bits.
type accumulator96 struct {
n [3]uint32
}
// Add adds the passed unsigned 64-bit value to the accumulator.
func (a *accumulator96) Add(v uint64) {
low := uint32(v & uint32Mask)
hi := uint32(v >> 32)
a.n[0] += low
hi += constantTimeLess(a.n[0], low) // Carry if overflow in n[0].
a.n[1] += hi
a.n[2] += constantTimeLess(a.n[1], hi) // Carry if overflow in n[1].
}
// Rsh32 right shifts the accumulator by 32 bits.
func (a *accumulator96) Rsh32() {
a.n[0] = a.n[1]
a.n[1] = a.n[2]
a.n[2] = 0
}
// reduce385 reduces the 385-bit intermediate result in the passed terms modulo
// the group order in constant time and stores the result in s.
func (s *ModNScalar) reduce385(t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12 uint64) {
// At this point, the intermediate result in the passed terms has been
// reduced to fit within 385 bits, so reduce it again using the same method
// described in reduce512. As before, the intermediate result will end up
// being reduced by another 127 bits to 258 bits, thus 9 32-bit terms are
// needed for this iteration. The reduced terms are assigned back to t0
// through t8.
//
// Note that several of the intermediate calculations require adding 64-bit
// products together which would overflow a uint64, so a 96-bit accumulator
// is used instead until the value is reduced enough to use native uint64s.
// Terms for 2^(32*0).
var acc accumulator96
acc.n[0] = uint32(t0) // == acc.Add(t0) because acc is guaranteed to be 0.
acc.Add(t8 * uint64(orderComplementWordZero))
t0 = uint64(acc.n[0])
acc.Rsh32()
// Terms for 2^(32*1).
acc.Add(t1)
acc.Add(t8 * uint64(orderComplementWordOne))
acc.Add(t9 * uint64(orderComplementWordZero))
t1 = uint64(acc.n[0])
acc.Rsh32()
// Terms for 2^(32*2).
acc.Add(t2)
acc.Add(t8 * uint64(orderComplementWordTwo))
acc.Add(t9 * uint64(orderComplementWordOne))
acc.Add(t10 * uint64(orderComplementWordZero))
t2 = uint64(acc.n[0])
acc.Rsh32()
// Terms for 2^(32*3).
acc.Add(t3)
acc.Add(t8 * uint64(orderComplementWordThree))
acc.Add(t9 * uint64(orderComplementWordTwo))
acc.Add(t10 * uint64(orderComplementWordOne))
acc.Add(t11 * uint64(orderComplementWordZero))
t3 = uint64(acc.n[0])
acc.Rsh32()
// Terms for 2^(32*4).
acc.Add(t4)
acc.Add(t8) // * uint64(orderComplementWordFour) // * 1
acc.Add(t9 * uint64(orderComplementWordThree))
acc.Add(t10 * uint64(orderComplementWordTwo))
acc.Add(t11 * uint64(orderComplementWordOne))
acc.Add(t12 * uint64(orderComplementWordZero))
t4 = uint64(acc.n[0])
acc.Rsh32()
// Terms for 2^(32*5).
acc.Add(t5)
// acc.Add(t8 * uint64(orderComplementWordFive)) // 0
acc.Add(t9) // * uint64(orderComplementWordFour) // * 1
acc.Add(t10 * uint64(orderComplementWordThree))
acc.Add(t11 * uint64(orderComplementWordTwo))
acc.Add(t12 * uint64(orderComplementWordOne))
t5 = uint64(acc.n[0])
acc.Rsh32()
// Terms for 2^(32*6).
acc.Add(t6)
// acc.Add(t8 * uint64(orderComplementWordSix)) // 0
// acc.Add(t9 * uint64(orderComplementWordFive)) // 0
acc.Add(t10) // * uint64(orderComplementWordFour) // * 1
acc.Add(t11 * uint64(orderComplementWordThree))
acc.Add(t12 * uint64(orderComplementWordTwo))
t6 = uint64(acc.n[0])
acc.Rsh32()
// Terms for 2^(32*7).
acc.Add(t7)
// acc.Add(t8 * uint64(orderComplementWordSeven)) // 0
// acc.Add(t9 * uint64(orderComplementWordSix)) // 0
// acc.Add(t10 * uint64(orderComplementWordFive)) // 0
acc.Add(t11) // * uint64(orderComplementWordFour) // * 1
acc.Add(t12 * uint64(orderComplementWordThree))
t7 = uint64(acc.n[0])
acc.Rsh32()
// Terms for 2^(32*8).
// acc.Add(t9 * uint64(orderComplementWordSeven)) // 0
// acc.Add(t10 * uint64(orderComplementWordSix)) // 0
// acc.Add(t11 * uint64(orderComplementWordFive)) // 0
acc.Add(t12) // * uint64(orderComplementWordFour) // * 1
t8 = uint64(acc.n[0])
// acc.Rsh32() // No need since not used after this. Guaranteed to be 0.
// NOTE: All of the remaining multiplications for this iteration result in 0
// as they all involve multiplying by combinations of the fifth, sixth, and
// seventh words of the two's complement of N, which are 0, so skip them.
// At this point, the result is reduced to fit within 258 bits, so reduce it
// again using a slightly modified version of the same method. The maximum
// value in t8 is 2 at this point and therefore multiplying it by each word
// of the two's complement of N and adding it to a 32-bit term will result
// in a maximum requirement of 33 bits, so it is safe to use native uint64s
// here for the intermediate term carry propagation.
//
// Also, since the maximum value in t8 is 2, this ends up reducing by
// another 2 bits to 256 bits.
c := t0 + t8*uint64(orderComplementWordZero)
s.n[0] = uint32(c & uint32Mask)
c = (c >> 32) + t1 + t8*uint64(orderComplementWordOne)
s.n[1] = uint32(c & uint32Mask)
c = (c >> 32) + t2 + t8*uint64(orderComplementWordTwo)
s.n[2] = uint32(c & uint32Mask)
c = (c >> 32) + t3 + t8*uint64(orderComplementWordThree)
s.n[3] = uint32(c & uint32Mask)
c = (c >> 32) + t4 + t8 // * uint64(orderComplementWordFour) == * 1
s.n[4] = uint32(c & uint32Mask)
c = (c >> 32) + t5 // + t8*uint64(orderComplementWordFive) == 0
s.n[5] = uint32(c & uint32Mask)
c = (c >> 32) + t6 // + t8*uint64(orderComplementWordSix) == 0
s.n[6] = uint32(c & uint32Mask)
c = (c >> 32) + t7 // + t8*uint64(orderComplementWordSeven) == 0
s.n[7] = uint32(c & uint32Mask)
// The result is now 256 bits, but it might still be >= N, so use the
// existing normal reduce method for 256-bit values.
s.reduce256(uint32(c>>32) + s.overflows())
}
// reduce512 reduces the 512-bit intermediate result in the passed terms modulo
// the group order down to 385 bits in constant time and stores the result in s.
func (s *ModNScalar) reduce512(t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13, t14, t15 uint64) {
// At this point, the intermediate result in the passed terms is grouped
// into the respective bases.
//
// Per [HAC] section 14.3.4: Reduction method of moduli of special form,
// when the modulus is of the special form m = b^t - c, where log_2(c) < t,
// highly efficient reduction can be achieved per the provided algorithm.
//
// The secp256k1 group order fits this criteria since it is:
// 2^256 - 432420386565659656852420866394968145599
//
// Technically the max possible value here is (N-1)^2 since the two scalars
// being multiplied are always mod N. Nevertheless, it is safer to consider
// it to be (2^256-1)^2 = 2^512 - 2^256 + 1 since it is the product of two
// 256-bit values.
//
// The algorithm is to reduce the result modulo the prime by subtracting
// multiples of the group order N. However, in order simplify carry
// propagation, this adds with the two's complement of N to achieve the same
// result.
//
// Since the two's complement of N has 127 leading zero bits, this will end
// up reducing the intermediate result from 512 bits to 385 bits, resulting
// in 13 32-bit terms. The reduced terms are assigned back to t0 through
// t12.
//
// Note that several of the intermediate calculations require adding 64-bit
// products together which would overflow a uint64, so a 96-bit accumulator
// is used instead.
// Terms for 2^(32*0).
var acc accumulator96
acc.n[0] = uint32(t0) // == acc.Add(t0) because acc is guaranteed to be 0.
acc.Add(t8 * uint64(orderComplementWordZero))
t0 = uint64(acc.n[0])
acc.Rsh32()
// Terms for 2^(32*1).
acc.Add(t1)
acc.Add(t8 * uint64(orderComplementWordOne))
acc.Add(t9 * uint64(orderComplementWordZero))
t1 = uint64(acc.n[0])
acc.Rsh32()
// Terms for 2^(32*2).
acc.Add(t2)
acc.Add(t8 * uint64(orderComplementWordTwo))
acc.Add(t9 * uint64(orderComplementWordOne))
acc.Add(t10 * uint64(orderComplementWordZero))
t2 = uint64(acc.n[0])
acc.Rsh32()
// Terms for 2^(32*3).
acc.Add(t3)
acc.Add(t8 * uint64(orderComplementWordThree))
acc.Add(t9 * uint64(orderComplementWordTwo))
acc.Add(t10 * uint64(orderComplementWordOne))
acc.Add(t11 * uint64(orderComplementWordZero))
t3 = uint64(acc.n[0])
acc.Rsh32()
// Terms for 2^(32*4).
acc.Add(t4)
acc.Add(t8) // * uint64(orderComplementWordFour) // * 1
acc.Add(t9 * uint64(orderComplementWordThree))
acc.Add(t10 * uint64(orderComplementWordTwo))
acc.Add(t11 * uint64(orderComplementWordOne))
acc.Add(t12 * uint64(orderComplementWordZero))
t4 = uint64(acc.n[0])
acc.Rsh32()
// Terms for 2^(32*5).
acc.Add(t5)
// acc.Add(t8 * uint64(orderComplementWordFive)) // 0
acc.Add(t9) // * uint64(orderComplementWordFour) // * 1
acc.Add(t10 * uint64(orderComplementWordThree))
acc.Add(t11 * uint64(orderComplementWordTwo))
acc.Add(t12 * uint64(orderComplementWordOne))
acc.Add(t13 * uint64(orderComplementWordZero))
t5 = uint64(acc.n[0])
acc.Rsh32()
// Terms for 2^(32*6).
acc.Add(t6)
// acc.Add(t8 * uint64(orderComplementWordSix)) // 0
// acc.Add(t9 * uint64(orderComplementWordFive)) // 0
acc.Add(t10) // * uint64(orderComplementWordFour)) // * 1
acc.Add(t11 * uint64(orderComplementWordThree))
acc.Add(t12 * uint64(orderComplementWordTwo))
acc.Add(t13 * uint64(orderComplementWordOne))
acc.Add(t14 * uint64(orderComplementWordZero))
t6 = uint64(acc.n[0])
acc.Rsh32()
// Terms for 2^(32*7).
acc.Add(t7)
// acc.Add(t8 * uint64(orderComplementWordSeven)) // 0
// acc.Add(t9 * uint64(orderComplementWordSix)) // 0
// acc.Add(t10 * uint64(orderComplementWordFive)) // 0
acc.Add(t11) // * uint64(orderComplementWordFour) // * 1
acc.Add(t12 * uint64(orderComplementWordThree))
acc.Add(t13 * uint64(orderComplementWordTwo))
acc.Add(t14 * uint64(orderComplementWordOne))
acc.Add(t15 * uint64(orderComplementWordZero))
t7 = uint64(acc.n[0])
acc.Rsh32()
// Terms for 2^(32*8).
// acc.Add(t9 * uint64(orderComplementWordSeven)) // 0
// acc.Add(t10 * uint64(orderComplementWordSix)) // 0
// acc.Add(t11 * uint64(orderComplementWordFive)) // 0
acc.Add(t12) // * uint64(orderComplementWordFour) // * 1
acc.Add(t13 * uint64(orderComplementWordThree))
acc.Add(t14 * uint64(orderComplementWordTwo))
acc.Add(t15 * uint64(orderComplementWordOne))
t8 = uint64(acc.n[0])
acc.Rsh32()
// Terms for 2^(32*9).
// acc.Add(t10 * uint64(orderComplementWordSeven)) // 0
// acc.Add(t11 * uint64(orderComplementWordSix)) // 0
// acc.Add(t12 * uint64(orderComplementWordFive)) // 0
acc.Add(t13) // * uint64(orderComplementWordFour) // * 1
acc.Add(t14 * uint64(orderComplementWordThree))
acc.Add(t15 * uint64(orderComplementWordTwo))
t9 = uint64(acc.n[0])
acc.Rsh32()
// Terms for 2^(32*10).
// acc.Add(t11 * uint64(orderComplementWordSeven)) // 0
// acc.Add(t12 * uint64(orderComplementWordSix)) // 0
// acc.Add(t13 * uint64(orderComplementWordFive)) // 0
acc.Add(t14) // * uint64(orderComplementWordFour) // * 1
acc.Add(t15 * uint64(orderComplementWordThree))
t10 = uint64(acc.n[0])
acc.Rsh32()
// Terms for 2^(32*11).
// acc.Add(t12 * uint64(orderComplementWordSeven)) // 0
// acc.Add(t13 * uint64(orderComplementWordSix)) // 0
// acc.Add(t14 * uint64(orderComplementWordFive)) // 0
acc.Add(t15) // * uint64(orderComplementWordFour) // * 1
t11 = uint64(acc.n[0])
acc.Rsh32()
// NOTE: All of the remaining multiplications for this iteration result in 0
// as they all involve multiplying by combinations of the fifth, sixth, and
// seventh words of the two's complement of N, which are 0, so skip them.
// Terms for 2^(32*12).
t12 = uint64(acc.n[0])
// acc.Rsh32() // No need since not used after this. Guaranteed to be 0.
// At this point, the result is reduced to fit within 385 bits, so reduce it
// again using the same method accordingly.
s.reduce385(t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12)
}
// Mul2 multiplies the passed two scalars together modulo the group order in
// constant time and stores the result in s.
//
// The scalar is returned to support chaining. This enables syntax like:
// s3.Mul2(s, s2).AddInt(1) so that s3 = (s * s2) + 1.
func (s *ModNScalar) Mul2(val, val2 *ModNScalar) *ModNScalar {
// This could be done with for loops and an array to store the intermediate
// terms, but this unrolled version is significantly faster.
// The overall strategy employed here is:
// 1) Calculate the 512-bit product of the two scalars using the standard
// pencil-and-paper method.
// 2) Reduce the result modulo the prime by effectively subtracting
// multiples of the group order N (actually performed by adding multiples
// of the two's complement of N to avoid implementing subtraction).
// 3) Repeat step 2 noting that each iteration reduces the required number
// of bits by 127 because the two's complement of N has 127 leading zero
// bits.
// 4) Once reduced to 256 bits, call the existing reduce method to perform
// a final reduction as needed.
//
// Note that several of the intermediate calculations require adding 64-bit
// products together which would overflow a uint64, so a 96-bit accumulator
// is used instead.
// Terms for 2^(32*0).
var acc accumulator96
acc.Add(uint64(val.n[0]) * uint64(val2.n[0]))
t0 := uint64(acc.n[0])
acc.Rsh32()
// Terms for 2^(32*1).
acc.Add(uint64(val.n[0]) * uint64(val2.n[1]))
acc.Add(uint64(val.n[1]) * uint64(val2.n[0]))
t1 := uint64(acc.n[0])
acc.Rsh32()
// Terms for 2^(32*2).
acc.Add(uint64(val.n[0]) * uint64(val2.n[2]))
acc.Add(uint64(val.n[1]) * uint64(val2.n[1]))
acc.Add(uint64(val.n[2]) * uint64(val2.n[0]))
t2 := uint64(acc.n[0])
acc.Rsh32()
// Terms for 2^(32*3).
acc.Add(uint64(val.n[0]) * uint64(val2.n[3]))
acc.Add(uint64(val.n[1]) * uint64(val2.n[2]))
acc.Add(uint64(val.n[2]) * uint64(val2.n[1]))
acc.Add(uint64(val.n[3]) * uint64(val2.n[0]))
t3 := uint64(acc.n[0])
acc.Rsh32()
// Terms for 2^(32*4).
acc.Add(uint64(val.n[0]) * uint64(val2.n[4]))
acc.Add(uint64(val.n[1]) * uint64(val2.n[3]))
acc.Add(uint64(val.n[2]) * uint64(val2.n[2]))
acc.Add(uint64(val.n[3]) * uint64(val2.n[1]))
acc.Add(uint64(val.n[4]) * uint64(val2.n[0]))
t4 := uint64(acc.n[0])
acc.Rsh32()
// Terms for 2^(32*5).
acc.Add(uint64(val.n[0]) * uint64(val2.n[5]))
acc.Add(uint64(val.n[1]) * uint64(val2.n[4]))
acc.Add(uint64(val.n[2]) * uint64(val2.n[3]))
acc.Add(uint64(val.n[3]) * uint64(val2.n[2]))
acc.Add(uint64(val.n[4]) * uint64(val2.n[1]))
acc.Add(uint64(val.n[5]) * uint64(val2.n[0]))
t5 := uint64(acc.n[0])
acc.Rsh32()
// Terms for 2^(32*6).
acc.Add(uint64(val.n[0]) * uint64(val2.n[6]))
acc.Add(uint64(val.n[1]) * uint64(val2.n[5]))
acc.Add(uint64(val.n[2]) * uint64(val2.n[4]))
acc.Add(uint64(val.n[3]) * uint64(val2.n[3]))
acc.Add(uint64(val.n[4]) * uint64(val2.n[2]))
acc.Add(uint64(val.n[5]) * uint64(val2.n[1]))
acc.Add(uint64(val.n[6]) * uint64(val2.n[0]))
t6 := uint64(acc.n[0])
acc.Rsh32()
// Terms for 2^(32*7).
acc.Add(uint64(val.n[0]) * uint64(val2.n[7]))
acc.Add(uint64(val.n[1]) * uint64(val2.n[6]))
acc.Add(uint64(val.n[2]) * uint64(val2.n[5]))
acc.Add(uint64(val.n[3]) * uint64(val2.n[4]))
acc.Add(uint64(val.n[4]) * uint64(val2.n[3]))
acc.Add(uint64(val.n[5]) * uint64(val2.n[2]))
acc.Add(uint64(val.n[6]) * uint64(val2.n[1]))
acc.Add(uint64(val.n[7]) * uint64(val2.n[0]))
t7 := uint64(acc.n[0])
acc.Rsh32()
// Terms for 2^(32*8).
acc.Add(uint64(val.n[1]) * uint64(val2.n[7]))
acc.Add(uint64(val.n[2]) * uint64(val2.n[6]))
acc.Add(uint64(val.n[3]) * uint64(val2.n[5]))
acc.Add(uint64(val.n[4]) * uint64(val2.n[4]))
acc.Add(uint64(val.n[5]) * uint64(val2.n[3]))
acc.Add(uint64(val.n[6]) * uint64(val2.n[2]))
acc.Add(uint64(val.n[7]) * uint64(val2.n[1]))
t8 := uint64(acc.n[0])
acc.Rsh32()
// Terms for 2^(32*9).
acc.Add(uint64(val.n[2]) * uint64(val2.n[7]))
acc.Add(uint64(val.n[3]) * uint64(val2.n[6]))
acc.Add(uint64(val.n[4]) * uint64(val2.n[5]))
acc.Add(uint64(val.n[5]) * uint64(val2.n[4]))
acc.Add(uint64(val.n[6]) * uint64(val2.n[3]))
acc.Add(uint64(val.n[7]) * uint64(val2.n[2]))
t9 := uint64(acc.n[0])
acc.Rsh32()
// Terms for 2^(32*10).
acc.Add(uint64(val.n[3]) * uint64(val2.n[7]))
acc.Add(uint64(val.n[4]) * uint64(val2.n[6]))
acc.Add(uint64(val.n[5]) * uint64(val2.n[5]))
acc.Add(uint64(val.n[6]) * uint64(val2.n[4]))
acc.Add(uint64(val.n[7]) * uint64(val2.n[3]))
t10 := uint64(acc.n[0])
acc.Rsh32()
// Terms for 2^(32*11).
acc.Add(uint64(val.n[4]) * uint64(val2.n[7]))
acc.Add(uint64(val.n[5]) * uint64(val2.n[6]))
acc.Add(uint64(val.n[6]) * uint64(val2.n[5]))
acc.Add(uint64(val.n[7]) * uint64(val2.n[4]))
t11 := uint64(acc.n[0])
acc.Rsh32()
// Terms for 2^(32*12).
acc.Add(uint64(val.n[5]) * uint64(val2.n[7]))
acc.Add(uint64(val.n[6]) * uint64(val2.n[6]))
acc.Add(uint64(val.n[7]) * uint64(val2.n[5]))
t12 := uint64(acc.n[0])
acc.Rsh32()
// Terms for 2^(32*13).
acc.Add(uint64(val.n[6]) * uint64(val2.n[7]))
acc.Add(uint64(val.n[7]) * uint64(val2.n[6]))
t13 := uint64(acc.n[0])
acc.Rsh32()
// Terms for 2^(32*14).
acc.Add(uint64(val.n[7]) * uint64(val2.n[7]))
t14 := uint64(acc.n[0])
acc.Rsh32()
// What's left is for 2^(32*15).
t15 := uint64(acc.n[0])
// acc.Rsh32() // No need since not used after this. Guaranteed to be 0.
// At this point, all of the terms are grouped into their respective base
// and occupy up to 512 bits. Reduce the result accordingly.
s.reduce512(t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13, t14,
t15)
return s
}
// Mul multiplies the passed scalar with the existing one modulo the group order
// in constant time and stores the result in s.
//
// The scalar is returned to support chaining. This enables syntax like:
// s.Mul(s2).AddInt(1) so that s = (s * s2) + 1.
func (s *ModNScalar) Mul(val *ModNScalar) *ModNScalar {
return s.Mul2(s, val)
}
// SquareVal squares the passed scalar modulo the group order in constant time
// and stores the result in s.
//
// The scalar is returned to support chaining. This enables syntax like:
// s3.SquareVal(s).Mul(s) so that s3 = s^2 * s = s^3.
func (s *ModNScalar) SquareVal(val *ModNScalar) *ModNScalar {
// This could technically be optimized slightly to take advantage of the
// fact that many of the intermediate calculations in squaring are just
// doubling, however, benchmarking has shown that due to the need to use a
// 96-bit accumulator, any savings are essentially offset by that and
// consequently there is no real difference in performance over just
// multiplying the value by itself to justify the extra code for now. This
// can be revisited in the future if it becomes a bottleneck in practice.
return s.Mul2(val, val)
}
// Square squares the scalar modulo the group order in constant time. The
// existing scalar is modified.
//
// The scalar is returned to support chaining. This enables syntax like:
// s.Square().Mul(s2) so that s = s^2 * s2.
func (s *ModNScalar) Square() *ModNScalar {
return s.SquareVal(s)
}
// NegateVal negates the passed scalar modulo the group order and stores the
// result in s in constant time.
//
// The scalar is returned to support chaining. This enables syntax like:
// s.NegateVal(s2).AddInt(1) so that s = -s2 + 1.
func (s *ModNScalar) NegateVal(val *ModNScalar) *ModNScalar {
// Since the scalar is already in the range 0 <= val < N, where N is the
// group order, negation modulo the group order is just the group order
// minus the value. This implies that the result will always be in the
// desired range with the sole exception of 0 because N - 0 = N itself.
//