Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

perf: direct Fp6 extension for BW6-761 #1126

Merged
merged 26 commits into from
May 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
4839f7b
feat(bw6): Fp6 as a direct extension using Montgomery-6
yelhousni Apr 25, 2024
ad255d3
fix(bw6): pairing using direct sextic extension
yelhousni Apr 25, 2024
931edcd
perf(bw6): use Karabina12345 instead of GS for small sizes too
yelhousni Apr 25, 2024
e2b4816
perf(bw6): optimize pairing with new tower
yelhousni Apr 25, 2024
650f879
perf(bw6): optimize Montgomery-6 mul
yelhousni Apr 25, 2024
37e3874
perf(bw6): optimize specialized Montgomery-6 mul
yelhousni Apr 25, 2024
4df060a
perf(bw6): save some adds in specialized mul e6
yelhousni Apr 26, 2024
72e558a
perf(bw6): sparse mul by lines
yelhousni Apr 26, 2024
a094956
perf(bw6): Square uses Karatsuba over Chung-Hasan instead of TC6
yelhousni Apr 27, 2024
00c01e5
perf(bw6): save some subs in Fp6 square
yelhousni Apr 27, 2024
a203ff8
perf(bw6): mulby02345
yelhousni Apr 29, 2024
0351ff1
perf(bw6): optimize mulby023
yelhousni Apr 29, 2024
ece2c04
refactor(bw6): remove dead code
yelhousni Apr 29, 2024
f3af4b3
perf(bw6): save 2 subs in fp6 sq
yelhousni May 1, 2024
909e11e
refactor: clean code
yelhousni May 1, 2024
31d61bc
fix(bw6): Toom-Cook 6-way mul
yelhousni May 3, 2024
b27d4a5
perf(bw6): use hint to divide by 362880 in Toom-6
yelhousni May 3, 2024
0900463
refactor: clean code
yelhousni May 3, 2024
471563d
test: update stats
yelhousni May 3, 2024
f10da89
Merge branch 'master' into perf/direct-extensions
yelhousni May 4, 2024
d3ae586
Merge branch 'master' into perf/direct-extensions
yelhousni May 4, 2024
de595f7
refactor(bw6/Fp6-mul): record some common additions
yelhousni May 6, 2024
a4e6b23
test: update stats
yelhousni May 6, 2024
9c26d64
refactor(bw6): remove benchmark
yelhousni May 6, 2024
e621951
refactor(bw6): remove benchmark
yelhousni May 6, 2024
5d27c86
refactor(bw6): apply review suggestion
yelhousni May 7, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file modified internal/stats/latest.stats
Binary file not shown.
506 changes: 0 additions & 506 deletions std/algebra/emulated/fields_bw6761/e3.go

This file was deleted.

410 changes: 0 additions & 410 deletions std/algebra/emulated/fields_bw6761/e3_test.go

This file was deleted.

1,176 changes: 833 additions & 343 deletions std/algebra/emulated/fields_bw6761/e6.go

Large diffs are not rendered by default.

278 changes: 166 additions & 112 deletions std/algebra/emulated/fields_bw6761/e6_pairing.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ func (e Ext6) ExpX0Minus1(z *E6) *E6 {
result = e.Mul(result, z33)
result = e.nSquareKarabina12345(result, 4)
result = e.Mul(result, z)
result = e.CyclotomicSquare(result)
result = e.nSquareKarabina12345(result, 1)
result = e.Mul(result, z)
result = e.nSquareKarabina12345(result, 46)

Expand All @@ -39,11 +39,11 @@ func (e Ext6) ExpX0Minus1Square(z *E6) *E6 {
z = e.Reduce(z)
result := e.Copy(z)
result = e.nSquareKarabina12345(result, 3)
t0 := e.CyclotomicSquare(result)
t0 := e.nSquareKarabina12345(result, 1)
t2 := e.Mul(z, t0)
result = e.Mul(result, t2)
t0 = e.Mul(z, result)
t1 := e.CyclotomicSquare(t0)
t1 := e.nSquareKarabina12345(t0, 1)
t1 = e.Mul(t2, t1)
t3 := e.nSquareKarabina12345(t1, 7)
t2 = e.Mul(t2, t3)
Expand All @@ -65,15 +65,15 @@ func (e Ext6) ExpX0Minus1Square(z *E6) *E6 {
func (e Ext6) ExpX0Plus1(z *E6) *E6 {
z = e.Reduce(z)
result := e.Copy(z)
t := e.CyclotomicSquare(result)
t := e.nSquareKarabina12345(result, 1)
result = e.nSquareKarabina12345(t, 4)
result = e.Mul(result, z)
z33 := e.Copy(result)
result = e.nSquareKarabina12345(result, 7)
result = e.Mul(result, z33)
result = e.nSquareKarabina12345(result, 4)
result = e.Mul(result, z)
result = e.CyclotomicSquare(result)
result = e.nSquareKarabina12345(result, 1)
result = e.Mul(result, z)
result = e.nSquareKarabina12345(result, 46)
result = e.Mul(result, t)
Expand All @@ -86,10 +86,9 @@ func (e Ext6) ExpX0Plus1(z *E6) *E6 {
func (e Ext6) ExptMinus1Div3(z *E6) *E6 {
z = e.Reduce(z)
result := e.Copy(z)
result = e.CyclotomicSquare(result)
result = e.CyclotomicSquare(result)
result = e.nSquareKarabina12345(result, 2)
result = e.Mul(result, z)
result = e.CyclotomicSquare(result)
result = e.nSquareKarabina12345(result, 1)
result = e.Mul(result, z)
t0 := e.nSquareKarabina12345(result, 7)
result = e.Mul(result, t0)
Expand All @@ -106,10 +105,9 @@ func (e Ext6) ExptMinus1Div3(z *E6) *E6 {
func (e Ext6) ExpC1(z *E6) *E6 {
z = e.Reduce(z)
result := e.Copy(z)
result = e.CyclotomicSquare(result)
result = e.CyclotomicSquare(result)
result = e.nSquareKarabina12345(result, 2)
result = e.Mul(result, z)
result = e.CyclotomicSquare(result)
result = e.nSquareKarabina12345(result, 1)
result = e.Mul(result, z)

return result
Expand All @@ -120,68 +118,94 @@ func (e Ext6) ExpC1(z *E6) *E6 {
// C2 = (ht**2+3*hy**2)/4 = 103
func (e Ext6) ExpC2(z *E6) *E6 {
z = e.Reduce(z)
result := e.CyclotomicSquare(z)
result := e.nSquareKarabina12345(z, 1)
result = e.Mul(result, z)
t0 := e.nSquareKarabina12345(result, 4)
result = e.Mul(result, t0)
result = e.CyclotomicSquare(result)
result = e.nSquareKarabina12345(result, 1)
result = e.Mul(result, z)

return result
}

// MulBy014 multiplies z by an E6 sparse element of the form
// MulBy023 multiplies z by an E6 sparse element of the form
//
// E6{
// B0: E3{A0: c0, A1: c1, A2: 0},
// B1: E3{A0: 0, A1: 1, A2: 0},
// }
func (e *Ext6) MulBy014(z *E6, c0, c1 *baseEl) *E6 {
// E6{A0: c0, A1: 0, A2: c1, A3: 1, A4: 0, A5: 0}
func (e *Ext6) MulBy023(z *E6, c0, c1 *baseEl) *E6 {
z = e.Reduce(z)

a := e.MulBy01(&z.B0, c0, c1)
a := e.fp.Mul(&z.A0, c0)
b := e.fp.Mul(&z.A2, c1)
tmp := e.fp.Add(&z.A2, &z.A4)
a0 := e.fp.Mul(c1, tmp)
a0 = e.fp.Sub(b, a0)
a0 = e.fp.MulConst(a0, big.NewInt(4))
a0 = e.fp.Add(a0, a)
a2 := e.fp.Mul(&z.A4, c0)
a2 = e.fp.Add(a2, b)
a1 := e.fp.Add(c0, c1)
tmp = e.fp.Add(&z.A0, &z.A2)
a1 = e.fp.Mul(a1, tmp)
a1 = e.fp.Sub(a1, a)
a1 = e.fp.Sub(a1, b)

var b E3
// Mul by E3{0, 1, 0}
b.A0 = *e.fp.MulConst(&z.B1.A2, big.NewInt(4))
b.A2 = *e.fp.Neg(&z.B1.A1)
b.A1 = *e.fp.Neg(&z.B1.A0)
b0 := e.fp.MulConst(&z.A5, big.NewInt(4))
b2 := e.fp.Neg(&z.A3)
b1 := e.fp.Neg(&z.A1)

one := e.fp.One()
d := e.fp.Add(c1, one)

zC1 := e.Ext3.Add(&z.B1, &z.B0)
zC1 = e.Ext3.MulBy01(zC1, c0, d)
zC1 = e.Ext3.Sub(zC1, a)
zC1 = e.Ext3.Add(zC1, &b)
zC0 := &E3{
A0: *e.fp.MulConst(&b.A2, big.NewInt(4)),
A1: *e.fp.Neg(&b.A0),
A2: *e.fp.Neg(&b.A1),
}
zC10 := e.fp.Add(&z.A1, &z.A0)
zC11 := e.fp.Add(&z.A3, &z.A2)
zC12 := e.fp.Add(&z.A5, &z.A4)

a = e.fp.Mul(zC10, c0)
b = e.fp.Mul(zC11, d)
tmp = e.fp.Add(zC11, zC12)
t0 := e.fp.Mul(d, tmp)
t0 = e.fp.Sub(b, t0)
t0 = e.fp.MulConst(t0, big.NewInt(4))
t0 = e.fp.Add(t0, a)
t2 := e.fp.Mul(zC12, c0)
t2 = e.fp.Add(t2, b)
t1 := e.fp.Add(c0, d)
tmp = e.fp.Add(zC10, zC11)
t1 = e.fp.Mul(t1, tmp)
t1 = e.fp.Sub(t1, a)
t1 = e.fp.Sub(t1, b)

zC10 = e.fp.Sub(t0, a0)
zC11 = e.fp.Sub(t1, a1)
zC12 = e.fp.Sub(t2, a2)

zC0 = e.Ext3.Add(zC0, a)
zC10 = e.fp.Add(zC10, b0)
zC11 = e.fp.Add(zC11, b1)
zC12 = e.fp.Add(zC12, b2)

zC00 := e.fp.Add(a0, e.fp.MulConst(b2, big.NewInt(4)))
zC01 := e.fp.Sub(a1, b0)
zC02 := e.fp.Sub(a2, b1)

return &E6{
B0: *zC0,
B1: *zC1,
A0: *zC00,
A1: *zC10,
A2: *zC01,
A3: *zC11,
A4: *zC02,
A5: *zC12,
}

}

// multiplies two E6 sparse element of the form:
// Mul023By023 multiplies two E6 sparse element of the form:
//
// E6{
// B0: E3{A0: c0, A1: c1, A2: 0},
// B1: E3{A0: 0, A1: 1, A2: 0},
// }
// E6{A0: c0, A1: 0, A2: c1, A3: 1, A4: 0, A5: 0}
//
// and
//
// E6{
// B0: E3{A0: d0, A1: d1, A2: 0},
// B1: E3{A0: 0, A1: 1, A2: 0},
// }
func (e Ext6) Mul014By014(d0, d1, c0, c1 *baseEl) [5]*baseEl {
// E6{A0: c0, A1: 0, A2: c1, A3: 1, A4: 0, A5: 0}
func (e Ext6) Mul023By023(d0, d1, c0, c1 *baseEl) [5]*baseEl {
x0 := e.fp.Mul(c0, d0)
x1 := e.fp.Mul(c1, d1)
x04 := e.fp.Add(c0, d0)
Expand All @@ -192,78 +216,108 @@ func (e Ext6) Mul014By014(d0, d1, c0, c1 *baseEl) [5]*baseEl {
x01 = e.fp.Sub(x01, tmp)
x14 := e.fp.Add(c1, d1)

four := emulated.ValueOf[emulated.BW6761Fp](big.NewInt(4))
zC0B0 := e.fp.Sub(x0, &four)
minusFour := emulated.ValueOf[emulated.BW6761Fp]("6891450384315732539396789682275657542479668912536150109513790160209623422243491736087683183289411687640864567753786613451161759120554247759349511699125301598951605099378508850372543631423596795951899700429969112842764913119068295") // -4 % p
zC0B0 := e.fp.Add(x0, &minusFour)

return [5]*baseEl{zC0B0, x01, x1, x04, x14}
return [5]*baseEl{zC0B0, x01, x04, x1, x14}
}

// MulBy01245 multiplies z by an E6 sparse element of the form
// MulBy02345 multiplies z by an E6 sparse element of the form
//
// E6{
// B0: E3{A0: c0, A1: c1, A2: c2},
// B1: E3{A0: 0, A1: c4, A2: c5},
// E6{A0: y0, A1: 0, A2: y1, A3: y2, A4: y3, A5: y4},
// }
func (e *Ext6) MulBy01245(z *E6, x [5]*baseEl) *E6 {
c0 := &E3{A0: *x[0], A1: *x[1], A2: *x[2]}
a := e.Ext3.Add(&z.B0, &z.B1)
b := &E3{
A0: c0.A0,
A1: *e.fp.Add(&c0.A1, x[3]),
A2: *e.fp.Add(&c0.A2, x[4]),
}
a = e.Ext3.Mul(a, b)
b = e.Ext3.Mul(&z.B0, c0)
c := e.Ext3.MulBy12(&z.B1, x[3], x[4])
z1 := e.Ext3.Sub(a, b)
z1 = e.Ext3.Sub(z1, c)
z0 := e.Ext3.MulByNonResidue(c)
z0 = e.Ext3.Add(z0, b)
return &E6{
B0: *z0,
B1: *z1,
}
}
func (e *Ext6) MulBy02345(z *E6, x [5]*baseEl) *E6 {
a0 := e.fp.Add(&z.A0, &z.A1)
a1 := e.fp.Add(&z.A2, &z.A3)
a2 := e.fp.Add(&z.A4, &z.A5)

// Mul01245By014 multiplies two E6 sparse element of the form
//
// E6{
// C0: E3{B0: x0, B1: x1, B2: x2},
// C1: E3{B0: 0, B1: x4, B2: x5},
// }
//
// and
//
// E6{
// C0: E3{B0: d0, B1: d1, B2: 0},
// C1: E3{B0: 0, B1: 1, B2: 0},
// }
func (e *Ext6) Mul01245By014(x [5]*baseEl, d0, d1 *baseEl) *E6 {
zero := e.fp.Zero()
c0 := &E3{A0: *x[0], A1: *x[1], A2: *x[2]}
b := &E3{
A0: *x[0],
A1: *e.fp.Add(x[1], x[3]),
A2: *e.fp.Add(x[2], x[4]),
}
a := e.Ext3.MulBy01(b, d0, e.fp.Add(d1, e.fp.One()))
b = e.Ext3.MulBy01(c0, d0, d1)
c := &E3{
A0: *e.fp.MulConst(x[4], big.NewInt(4)),
A1: *e.fp.Neg(zero),
A2: *e.fp.Neg(x[3]),
}
z1 := e.Ext3.Sub(a, b)
z1 = e.Ext3.Add(z1, c)
z0 := &E3{
A0: *e.fp.MulConst(&c.A2, big.NewInt(4)),
A1: *e.fp.Neg(&c.A0),
A2: *e.fp.Neg(&c.A1),
}
b1 := e.fp.Add(x[1], x[2])
b2 := e.fp.Add(x[3], x[4])

t0 := e.fp.Mul(a0, x[0])
t1 := e.fp.Mul(a1, b1)
t2 := e.fp.Mul(a2, b2)
c0 := e.fp.Add(a1, a2)
tmp := e.fp.Add(b1, b2)
c0 = e.fp.Mul(c0, tmp)
c0 = e.fp.Sub(c0, t1)
c0 = e.fp.Sub(t2, c0)
c0 = e.fp.MulConst(c0, big.NewInt(4))
tmp = e.fp.Add(a0, a2)
c2 := e.fp.Add(x[0], b2)
c2 = e.fp.Mul(c2, tmp)
c2 = e.fp.Sub(c2, t0)
c2 = e.fp.Sub(c2, t2)
c1 := e.fp.Add(a0, a1)
tmp = e.fp.Add(x[0], b1)
c1 = e.fp.Mul(c1, tmp)
c1 = e.fp.Sub(c1, t0)
c1 = e.fp.Sub(c1, t1)
t2 = e.mulFpByNonResidue(e.fp, t2)
a0 = e.fp.Add(c0, t0)
a1 = e.fp.Add(c1, t2)
a2 = e.fp.Add(c2, t1)

t0 = e.fp.Mul(&z.A0, x[0])
t1 = e.fp.Mul(&z.A2, x[1])
t2 = e.fp.Mul(&z.A4, x[3])
c0 = e.fp.Add(&z.A2, &z.A4)
tmp = e.fp.Add(x[1], x[3])
c0 = e.fp.Mul(c0, tmp)
c0 = e.fp.Sub(c0, t1)
c0 = e.fp.Sub(t2, c0)
c0 = e.fp.MulConst(c0, big.NewInt(4))
tmp = e.fp.Add(&z.A0, &z.A4)
c2 = e.fp.Add(x[0], x[3])
c2 = e.fp.Mul(c2, tmp)
c2 = e.fp.Sub(c2, t0)
c2 = e.fp.Sub(c2, t2)
c1 = e.fp.Add(&z.A0, &z.A2)
tmp = e.fp.Add(x[0], x[1])
c1 = e.fp.Mul(c1, tmp)
c1 = e.fp.Sub(c1, t0)
c1 = e.fp.Sub(c1, t1)
t2 = e.mulFpByNonResidue(e.fp, t2)
b0 := e.fp.Add(c0, t0)
b1 = e.fp.Add(c1, t2)
b2 = e.fp.Add(c2, t1)

t1 = e.fp.Mul(&z.A3, x[2])
t2 = e.fp.Mul(&z.A5, x[4])
c0 = e.fp.Add(&z.A3, &z.A5)
tmp = e.fp.Add(x[2], x[4])
c0 = e.fp.Mul(c0, tmp)
c0 = e.fp.Sub(c0, t1)
c0 = e.fp.Sub(t2, c0)
c0 = e.fp.MulConst(c0, big.NewInt(4))
c1 = e.fp.Add(&z.A1, &z.A3)
c1 = e.fp.Mul(c1, x[2])
c1 = e.fp.Sub(c1, t1)
tmp = e.mulFpByNonResidue(e.fp, t2)
c1 = e.fp.Add(c1, tmp)
tmp = e.fp.Add(&z.A1, &z.A5)
c2 = e.fp.Mul(x[4], tmp)
c2 = e.fp.Sub(c2, t2)
c2 = e.fp.Add(c2, t1)

tmp = e.fp.Add(b0, c0)
z10 := e.fp.Sub(a0, tmp)
tmp = e.fp.Add(b1, c1)
z11 := e.fp.Sub(a1, tmp)
tmp = e.fp.Add(b2, c2)
z12 := e.fp.Sub(a2, tmp)

z00 := e.mulFpByNonResidue(e.fp, c2)
z00 = e.fp.Add(z00, b0)
z01 := e.fp.Add(c0, b1)
z02 := e.fp.Add(c1, b2)

z0 = e.Ext3.Add(z0, b)
return &E6{
B0: *z0,
B1: *z1,
A0: *z00,
A1: *z10,
A2: *z01,
A3: *z11,
A4: *z02,
A5: *z12,
}
}
Loading
Loading