Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Perf: emulated pairing BN254 #714

Merged
merged 2 commits into from Jul 4, 2023
Merged

Perf: emulated pairing BN254 #714

merged 2 commits into from Jul 4, 2023

Conversation

yelhousni
Copy link
Contributor

@yelhousni yelhousni commented Jun 2, 2023

One more optim while writing the blog post: In the case of multi-pairings, when bit=0 we can first store the lines in the first loop and then iterate on them in the second loop while multiplying them together 2-by-2 before multiplying by the accumulator.

l1 := make([]*lineEvaluation, n)

// ...

switch loopCounter[i] {

case 0:
        // precompute lines
        for k := 0; k < n; k++ {
                // Qacc[k] ← 2Qacc[k] and l1 the tangent ℓ passing 2Qacc[k]
                Qacc[k], l1[k] = pr.doubleStep(Qacc[k])

                // line evaluation at P[k]
                l1[k].R0 = *pr.MulByElement(&l1[k].R0, xOverY[k])
                l1[k].R1 = *pr.MulByElement(&l1[k].R1, yInv[k])

        }

        // if number of lines is odd, mul last line by res
        // works for n=1 as well
        if n%2 != 0 {
                // ℓ × res
                res = pr.MulBy034(res, &l1[n-1].R0, &l1[n-1].R1)

        }

        // mul lines 2-by-2
        for k := 1; k < n; k += 2 {
                // ℓ × ℓ
                prodLines = *pr.Mul034By034(&l1[k].R0, &l1[k].R1, &l1[k-1].R0, &l1[k-1].R1)
                // (ℓ × ℓ) × res
                res = pr.MulBy01234(res, &prodLines)

        }
        // ...
}

This saves quite some constraints as the batch size grows. For example:

  • Batch of size 2: -3 588 r1cs
  • Batch of size 9: -89 441 r1cs

P.S.: this is not worth it for BLS12-381 as Mul014By014 (2-by-2 lines mul for M-type twist and quadratic final sub-extension) is not efficient circuit wise compared to 2 plain muls by line (MulBy014).

P.P.S.: this has a direct perf incidence on the ECPAIR precompile if we keep the 2-by-2 MillerLoop logic or increase it.

@ivokub
Copy link
Collaborator

ivokub commented Jun 23, 2023

Suggested edit:

diff --git a/std/algebra/emulated/sw_bn254/pairing_test.go b/std/algebra/emulated/sw_bn254/pairing_test.go
index 30fdd148..4623245e 100644
--- a/std/algebra/emulated/sw_bn254/pairing_test.go
+++ b/std/algebra/emulated/sw_bn254/pairing_test.go
@@ -112,66 +112,16 @@ func (c *MultiPairCircuit) Define(api frontend.API) error {
 	}
 	pairing.AssertIsOnG1(&c.InG1)
 	pairing.AssertIsOnG2(&c.InG2)
-	switch c.n {
-	case 2:
-		res, err := pairing.Pair([]*G1Affine{&c.InG1, &c.InG1}, []*G2Affine{&c.InG2, &c.InG2})
-		if err != nil {
-			return fmt.Errorf("pair: %w", err)
-		}
-		pairing.AssertIsEqual(res, &c.Res)
-
-	case 3:
-		res, err := pairing.Pair([]*G1Affine{&c.InG1, &c.InG1, &c.InG1}, []*G2Affine{&c.InG2, &c.InG2, &c.InG2})
-		if err != nil {
-			return fmt.Errorf("pair: %w", err)
-		}
-		pairing.AssertIsEqual(res, &c.Res)
-
-	case 4:
-		res, err := pairing.Pair([]*G1Affine{&c.InG1, &c.InG1, &c.InG1, &c.InG1}, []*G2Affine{&c.InG2, &c.InG2, &c.InG2, &c.InG2})
-		if err != nil {
-			return fmt.Errorf("pair: %w", err)
-		}
-		pairing.AssertIsEqual(res, &c.Res)
-
-	case 5:
-		res, err := pairing.Pair([]*G1Affine{&c.InG1, &c.InG1, &c.InG1, &c.InG1, &c.InG1}, []*G2Affine{&c.InG2, &c.InG2, &c.InG2, &c.InG2, &c.InG2})
-		if err != nil {
-			return fmt.Errorf("pair: %w", err)
-		}
-		pairing.AssertIsEqual(res, &c.Res)
-
-	case 6:
-		res, err := pairing.Pair([]*G1Affine{&c.InG1, &c.InG1, &c.InG1, &c.InG1, &c.InG1, &c.InG1}, []*G2Affine{&c.InG2, &c.InG2, &c.InG2, &c.InG2, &c.InG2, &c.InG2})
-		if err != nil {
-			return fmt.Errorf("pair: %w", err)
-		}
-		pairing.AssertIsEqual(res, &c.Res)
-
-	case 7:
-		res, err := pairing.Pair([]*G1Affine{&c.InG1, &c.InG1, &c.InG1, &c.InG1, &c.InG1, &c.InG1, &c.InG1}, []*G2Affine{&c.InG2, &c.InG2, &c.InG2, &c.InG2, &c.InG2, &c.InG2, &c.InG2})
-		if err != nil {
-			return fmt.Errorf("pair: %w", err)
-		}
-		pairing.AssertIsEqual(res, &c.Res)
-
-	case 8:
-		res, err := pairing.Pair([]*G1Affine{&c.InG1, &c.InG1, &c.InG1, &c.InG1, &c.InG1, &c.InG1, &c.InG1, &c.InG1}, []*G2Affine{&c.InG2, &c.InG2, &c.InG2, &c.InG2, &c.InG2, &c.InG2, &c.InG2, &c.InG2})
-		if err != nil {
-			return fmt.Errorf("pair: %w", err)
-		}
-		pairing.AssertIsEqual(res, &c.Res)
-
-	case 9:
-		res, err := pairing.Pair([]*G1Affine{&c.InG1, &c.InG1, &c.InG1, &c.InG1, &c.InG1, &c.InG1, &c.InG1, &c.InG1, &c.InG1}, []*G2Affine{&c.InG2, &c.InG2, &c.InG2, &c.InG2, &c.InG2, &c.InG2, &c.InG2, &c.InG2, &c.InG2})
-		if err != nil {
-			return fmt.Errorf("pair: %w", err)
-		}
-		pairing.AssertIsEqual(res, &c.Res)
-	default:
-		return fmt.Errorf("not handled %d", c.n)
-
+	P, Q := []*G1Affine{}, []*G2Affine{}
+	for i := 0; i < c.n; i++ {
+		P = append(P, &c.InG1)
+		Q = append(Q, &c.InG2)
 	}
+	res, err := pairing.Pair(P, Q)
+	if err != nil {
+		return fmt.Errorf("pair: %w", err)
+	}
+	pairing.AssertIsEqual(res, &c.Res)
 	return nil
 }
 
@@ -195,7 +145,6 @@ func TestMultiPairTestSolve(t *testing.T) {
 		}
 		err = test.IsSolved(&MultiPairCircuit{n: i}, &witness, ecc.BN254.ScalarField())
 		assert.NoError(err)
-		fmt.Println("Batch of size", i, "✅")
 	}
 }
 

@ivokub
Copy link
Collaborator

ivokub commented Jun 23, 2023

Suggested edit:

diff --git a/std/algebra/emulated/sw_bn254/pairing.go b/std/algebra/emulated/sw_bn254/pairing.go
index e1f523d8..6dbf11af 100644
--- a/std/algebra/emulated/sw_bn254/pairing.go
+++ b/std/algebra/emulated/sw_bn254/pairing.go
@@ -329,8 +329,7 @@ func (pr Pairing) MillerLoop(P []*G1Affine, Q []*G2Affine) (*GTEl, error) {
 	res := pr.Ext12.One()
 	var prodLines [5]fields_bn254.E2
 
-	l1 := make([]*lineEvaluation, n)
-	l2 := make([]*lineEvaluation, n)
+	var l1, l2 *lineEvaluation
 	Qacc := make([]*G2Affine, n)
 	QNeg := make([]*G2Affine, n)
 	yInv := make([]*emulated.Element[emulated.BN254Fp], n)
@@ -354,22 +353,22 @@ func (pr Pairing) MillerLoop(P []*G1Affine, Q []*G2Affine) (*GTEl, error) {
 
 	// k = 0, separately to avoid MulBy034 (res × ℓ)
 	// (assign line to res)
-	Qacc[0], l1[0] = pr.doubleStep(Qacc[0])
+	Qacc[0], l1 = pr.doubleStep(Qacc[0])
 	// line evaluation at P[0]
-	res.C1.B0 = *pr.MulByElement(&l1[0].R0, xOverY[0])
-	res.C1.B1 = *pr.MulByElement(&l1[0].R1, yInv[0])
+	res.C1.B0 = *pr.MulByElement(&l1.R0, xOverY[0])
+	res.C1.B1 = *pr.MulByElement(&l1.R1, yInv[0])
 
 	if n >= 2 {
 		// k = 1, separately to avoid MulBy034 (res × ℓ)
 		// (res is also a line at this point, so we use Mul034By034 ℓ × ℓ)
-		Qacc[1], l1[1] = pr.doubleStep(Qacc[1])
+		Qacc[1], l1 = pr.doubleStep(Qacc[1])
 
 		// line evaluation at P[1]
-		l1[1].R0 = *pr.MulByElement(&l1[1].R0, xOverY[1])
-		l1[1].R1 = *pr.MulByElement(&l1[1].R1, yInv[1])
+		l1.R0 = *pr.MulByElement(&l1.R0, xOverY[1])
+		l1.R1 = *pr.MulByElement(&l1.R1, yInv[1])
 
 		// ℓ × res
-		prodLines = *pr.Mul034By034(&l1[1].R0, &l1[1].R1, &res.C1.B0, &res.C1.B1)
+		prodLines = *pr.Mul034By034(&l1.R0, &l1.R1, &res.C1.B0, &res.C1.B1)
 		res.C0.B0 = prodLines[0]
 		res.C0.B1 = prodLines[1]
 		res.C0.B2 = prodLines[2]
@@ -380,26 +379,26 @@ func (pr Pairing) MillerLoop(P []*G1Affine, Q []*G2Affine) (*GTEl, error) {
 	if n >= 3 {
 		// k = 2, separately to avoid MulBy034 (res × ℓ)
 		// (res has a zero E2 element, so we use Mul01234By034)
-		Qacc[2], l1[2] = pr.doubleStep(Qacc[2])
+		Qacc[2], l1 = pr.doubleStep(Qacc[2])
 
 		// line evaluation at P[1]
-		l1[2].R0 = *pr.MulByElement(&l1[2].R0, xOverY[2])
-		l1[2].R1 = *pr.MulByElement(&l1[2].R1, yInv[2])
+		l1.R0 = *pr.MulByElement(&l1.R0, xOverY[2])
+		l1.R1 = *pr.MulByElement(&l1.R1, yInv[2])
 
 		// ℓ × res
-		res = pr.Mul01234By034(&prodLines, &l1[2].R0, &l1[2].R1)
+		res = pr.Mul01234By034(&prodLines, &l1.R0, &l1.R1)
 
 		// k >= 3
 		for k := 3; k < n; k++ {
-			// Qacc[k] ← 2Qacc[k] and l1[k] the tangent ℓ passing 2Qacc[k]
-			Qacc[k], l1[k] = pr.doubleStep(Qacc[k])
+			// Qacc[k] ← 2Qacc[k] and l1 the tangent ℓ passing 2Qacc[k]
+			Qacc[k], l1 = pr.doubleStep(Qacc[k])
 
 			// line evaluation at P[k]
-			l1[k].R0 = *pr.MulByElement(&l1[k].R0, xOverY[k])
-			l1[k].R1 = *pr.MulByElement(&l1[k].R1, yInv[k])
+			l1.R0 = *pr.MulByElement(&l1.R0, xOverY[k])
+			l1.R1 = *pr.MulByElement(&l1.R1, yInv[k])
 
 			// ℓ × res
-			res = pr.MulBy034(res, &l1[k].R0, &l1[k].R1)
+			res = pr.MulBy034(res, &l1.R0, &l1.R1)
 		}
 	}
 
@@ -409,22 +408,22 @@ func (pr Pairing) MillerLoop(P []*G1Affine, Q []*G2Affine) (*GTEl, error) {
 	res = pr.Square(res)
 	for k := 0; k < n; k++ {
 		// l2 the line passing Qacc[k] and -Q
-		l2[k] = pr.lineCompute(Qacc[k], QNeg[k])
+		l2 = pr.lineCompute(Qacc[k], QNeg[k])
 
 		// line evaluation at P[k]
-		l2[k].R0 = *pr.MulByElement(&l2[k].R0, xOverY[k])
-		l2[k].R1 = *pr.MulByElement(&l2[k].R1, yInv[k])
+		l2.R0 = *pr.MulByElement(&l2.R0, xOverY[k])
+		l2.R1 = *pr.MulByElement(&l2.R1, yInv[k])
 
 		// Qacc[k] ← Qacc[k]+Q[k] and
-		// l1[k] the line ℓ passing Qacc[k] and Q[k]
-		Qacc[k], l1[k] = pr.addStep(Qacc[k], Q[k])
+		// l1 the line ℓ passing Qacc[k] and Q[k]
+		Qacc[k], l1 = pr.addStep(Qacc[k], Q[k])
 
 		// line evaluation at P[k]
-		l1[k].R0 = *pr.MulByElement(&l1[k].R0, xOverY[k])
-		l1[k].R1 = *pr.MulByElement(&l1[k].R1, yInv[k])
+		l1.R0 = *pr.MulByElement(&l1.R0, xOverY[k])
+		l1.R1 = *pr.MulByElement(&l1.R1, yInv[k])
 
 		// ℓ × ℓ
-		prodLines = *pr.Mul034By034(&l1[k].R0, &l1[k].R1, &l2[k].R0, &l2[k].R1)
+		prodLines = *pr.Mul034By034(&l1.R0, &l1.R1, &l2.R0, &l2.R1)
 		// (ℓ × ℓ) × res
 		res = pr.MulBy01234(res, &prodLines)
 	}
@@ -468,20 +467,20 @@ func (pr Pairing) MillerLoop(P []*G1Affine, Q []*G2Affine) (*GTEl, error) {
 		case 1:
 			for k := 0; k < n; k++ {
 				// Qacc[k] ← 2Qacc[k]+Q[k],
-				// l1[k] the line ℓ passing Qacc[k] and Q[k]
-				// l2[k] the line ℓ passing (Qacc[k]+Q[k]) and Qacc[k]
-				Qacc[k], l1[k], l2[k] = pr.doubleAndAddStep(Qacc[k], Q[k])
+				// l1 the line ℓ passing Qacc[k] and Q[k]
+				// l2 the line ℓ passing (Qacc[k]+Q[k]) and Qacc[k]
+				Qacc[k], l1, l2 = pr.doubleAndAddStep(Qacc[k], Q[k])
 
 				// line evaluation at P[k]
-				l1[k].R0 = *pr.MulByElement(&l1[k].R0, xOverY[k])
-				l1[k].R1 = *pr.MulByElement(&l1[k].R1, yInv[k])
+				l1.R0 = *pr.MulByElement(&l1.R0, xOverY[k])
+				l1.R1 = *pr.MulByElement(&l1.R1, yInv[k])
 
 				// line evaluation at P[k]
-				l2[k].R0 = *pr.MulByElement(&l2[k].R0, xOverY[k])
-				l2[k].R1 = *pr.MulByElement(&l2[k].R1, yInv[k])
+				l2.R0 = *pr.MulByElement(&l2.R0, xOverY[k])
+				l2.R1 = *pr.MulByElement(&l2.R1, yInv[k])
 
 				// ℓ × ℓ
-				prodLines = *pr.Mul034By034(&l1[k].R0, &l1[k].R1, &l2[k].R0, &l2[k].R1)
+				prodLines = *pr.Mul034By034(&l1.R0, &l1.R1, &l2.R0, &l2.R1)
 				// (ℓ × ℓ) × res
 				res = pr.MulBy01234(res, &prodLines)
 
@@ -490,20 +489,20 @@ func (pr Pairing) MillerLoop(P []*G1Affine, Q []*G2Affine) (*GTEl, error) {
 		case -1:
 			for k := 0; k < n; k++ {
 				// Qacc[k] ← 2Qacc[k]-Q[k],
-				// l1[k] the line ℓ passing Qacc[k] and -Q[k]
-				// l2[k] the line ℓ passing (Qacc[k]-Q[k]) and Qacc[k]
-				Qacc[k], l1[k], l2[k] = pr.doubleAndAddStep(Qacc[k], QNeg[k])
+				// l1 the line ℓ passing Qacc[k] and -Q[k]
+				// l2 the line ℓ passing (Qacc[k]-Q[k]) and Qacc[k]
+				Qacc[k], l1, l2 = pr.doubleAndAddStep(Qacc[k], QNeg[k])
 
 				// line evaluation at P[k]
-				l1[k].R0 = *pr.MulByElement(&l1[k].R0, xOverY[k])
-				l1[k].R1 = *pr.MulByElement(&l1[k].R1, yInv[k])
+				l1.R0 = *pr.MulByElement(&l1.R0, xOverY[k])
+				l1.R1 = *pr.MulByElement(&l1.R1, yInv[k])
 
 				// line evaluation at P[k]
-				l2[k].R0 = *pr.MulByElement(&l2[k].R0, xOverY[k])
-				l2[k].R1 = *pr.MulByElement(&l2[k].R1, yInv[k])
+				l2.R0 = *pr.MulByElement(&l2.R0, xOverY[k])
+				l2.R1 = *pr.MulByElement(&l2.R1, yInv[k])
 
 				// ℓ × ℓ
-				prodLines = *pr.Mul034By034(&l1[k].R0, &l1[k].R1, &l2[k].R0, &l2[k].R1)
+				prodLines = *pr.Mul034By034(&l1.R0, &l1.R1, &l2.R0, &l2.R1)
 				// (ℓ × ℓ) × res
 				res = pr.MulBy01234(res, &prodLines)
 
@@ -529,21 +528,21 @@ func (pr Pairing) MillerLoop(P []*G1Affine, Q []*G2Affine) (*GTEl, error) {
 		Q2.Y = *pr.Ext2.Neg(&Q2.Y)
 
 		// Qacc[k] ← Qacc[k]+π(Q) and
-		// l1[k] the line passing Qacc[k] and π(Q)
-		Qacc[k], l1[k] = pr.addStep(Qacc[k], Q1)
+		// l1 the line passing Qacc[k] and π(Q)
+		Qacc[k], l1 = pr.addStep(Qacc[k], Q1)
 
 		// line evaluation at P[k]
-		l1[k].R0 = *pr.Ext2.MulByElement(&l1[k].R0, xOverY[k])
-		l1[k].R1 = *pr.Ext2.MulByElement(&l1[k].R1, yInv[k])
+		l1.R0 = *pr.Ext2.MulByElement(&l1.R0, xOverY[k])
+		l1.R1 = *pr.Ext2.MulByElement(&l1.R1, yInv[k])
 
-		// l2[k] the line passing Qacc[k] and -π²(Q)
-		l2[k] = pr.lineCompute(Qacc[k], Q2)
+		// l2 the line passing Qacc[k] and -π²(Q)
+		l2 = pr.lineCompute(Qacc[k], Q2)
 		// line evaluation at P[k]
-		l2[k].R0 = *pr.MulByElement(&l2[k].R0, xOverY[k])
-		l2[k].R1 = *pr.MulByElement(&l2[k].R1, yInv[k])
+		l2.R0 = *pr.MulByElement(&l2.R0, xOverY[k])
+		l2.R1 = *pr.MulByElement(&l2.R1, yInv[k])
 
 		// ℓ × ℓ
-		prodLines = *pr.Mul034By034(&l1[k].R0, &l1[k].R1, &l2[k].R0, &l2[k].R1)
+		prodLines = *pr.Mul034By034(&l1.R0, &l1.R1, &l2.R0, &l2.R1)
 		// (ℓ × ℓ) × res
 		res = pr.MulBy01234(res, &prodLines)
 

@ivokub
Copy link
Collaborator

ivokub commented Jun 23, 2023

Suggested edit:

diff --git a/std/algebra/emulated/sw_bn254/pairing.go b/std/algebra/emulated/sw_bn254/pairing.go
index e1f523d8..8198bbc1 100644
--- a/std/algebra/emulated/sw_bn254/pairing.go
+++ b/std/algebra/emulated/sw_bn254/pairing.go
@@ -429,6 +429,7 @@ func (pr Pairing) MillerLoop(P []*G1Affine, Q []*G2Affine) (*GTEl, error) {
 		res = pr.MulBy01234(res, &prodLines)
 	}
 
+	l1s := make([]*lineEvaluation, n)
 	for i := 62; i >= 0; i-- {
 		// mutualize the square among n Miller loops
 		// (∏ᵢfᵢ)²
@@ -440,11 +441,11 @@ func (pr Pairing) MillerLoop(P []*G1Affine, Q []*G2Affine) (*GTEl, error) {
 			// precompute lines
 			for k := 0; k < n; k++ {
 				// Qacc[k] ← 2Qacc[k] and l1 the tangent ℓ passing 2Qacc[k]
-				Qacc[k], l1[k] = pr.doubleStep(Qacc[k])
+				Qacc[k], l1s[k] = pr.doubleStep(Qacc[k])
 
 				// line evaluation at P[k]
-				l1[k].R0 = *pr.MulByElement(&l1[k].R0, xOverY[k])
-				l1[k].R1 = *pr.MulByElement(&l1[k].R1, yInv[k])
+				l1s[k].R0 = *pr.MulByElement(&l1s[k].R0, xOverY[k])
+				l1s[k].R1 = *pr.MulByElement(&l1s[k].R1, yInv[k])
 
 			}
 
@@ -452,14 +453,14 @@ func (pr Pairing) MillerLoop(P []*G1Affine, Q []*G2Affine) (*GTEl, error) {
 			// works for n=1 as well
 			if n%2 != 0 {
 				// ℓ × res
-				res = pr.MulBy034(res, &l1[n-1].R0, &l1[n-1].R1)
+				res = pr.MulBy034(res, &l1s[n-1].R0, &l1s[n-1].R1)
 
 			}
 
 			// mul lines 2-by-2
 			for k := 1; k < n; k += 2 {
 				// ℓ × ℓ
-				prodLines = *pr.Mul034By034(&l1[k].R0, &l1[k].R1, &l1[k-1].R0, &l1[k-1].R1)
+				prodLines = *pr.Mul034By034(&l1s[k].R0, &l1s[k].R1, &l1s[k-1].R0, &l1s[k-1].R1)
 				// (ℓ × ℓ) × res
 				res = pr.MulBy01234(res, &prodLines)
 

Copy link
Collaborator

@ivokub ivokub left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In general looks good and makes sense.

I suggested some edits:

  • in the main MillerLoop method we actually only use the whole slice in one loop. I kept the current implementation everywhere else as is easier to review.
  • in the test I used loop instead of select for different cases.
  • removed fmt.Println in test.

From these changes I think only removing fmt.Println would be essential. For the rest, see if makes sense.

@yelhousni yelhousni merged commit bb5a773 into develop Jul 4, 2023
5 checks passed
@yelhousni yelhousni deleted the perf/emulated-pairing branch July 4, 2023 12:30
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
Projects
None yet
Development

Successfully merging this pull request may close these issues.

None yet

2 participants