Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Consider using sqlite3 #2

Closed
stapelberg opened this issue Nov 1, 2017 · 1 comment
Closed

Consider using sqlite3 #2

stapelberg opened this issue Nov 1, 2017 · 1 comment
Labels

Comments

@stapelberg
Copy link
Contributor

Filing this issue to document the results of my tests:

  • Using sqlite3 increases the pk4 binary size by a factor of 2 (6M → 13M)
  • The sqlite3 database version of sources.index is 3x as large (4M .index → 11M .sqlite3)
  • Generating the sqlite3 database takes 2x as long as encoding .index (340ms .index → 715ms .sqlite3)
  • Lookup times depend on the key, but are roughly on par — sometimes .index is faster, but we’re talking <1ms anyway.

So, it seems like the only benefit we would get from sqlite3 are data types (we only store strings, though) and structure (we currently \t-separate). I think we should stick with our own format for the time being.

@stapelberg
Copy link
Contributor Author

For the record, here are the patches I used on top of commit 07b3552 for my tests:

@@ -2,20 +2,25 @@ package main
 
 import (
 	"bufio"
+	"database/sql"
 	"fmt"
 	"io"
+	"log"
 	"math"
 	"os"
 	"os/exec"
 	"path/filepath"
 	"sort"
 	"strings"
+	"time"
 
 	"github.com/Debian/pk4/internal/index"
 	"github.com/Debian/pk4/internal/write"
 	"golang.org/x/sync/errgroup"
 	"pault.ag/go/debian/control"
 	"pault.ag/go/debian/version"
+
+	_ "github.com/mattn/go-sqlite3"
 )
 
 func getBinaryIndexFile(filename string) ([]control.BinaryIndex, error) {
@@ -169,11 +174,46 @@ func genSources() error {
 
 	var weg errgroup.Group
 
-	weg.Go(func() error {
+	if err := func() error {
+		start := time.Now()
+		defer func() { log.Printf("sqlite3 took %v", time.Since(start)) }()
+		db, err := sql.Open("sqlite3", filepath.Join(*indexDir, "sources.sqlite3"))
+		if err != nil {
+			return err
+		}
+		defer db.Close()
+		if _, err := db.Exec(`CREATE TABLE idx (key TEXT PRIMARY KEY, value TEXT)`); err != nil {
+			return err
+		}
+		tx, err := db.Begin()
+		if err != nil {
+			return err
+		}
+		stmt, err := tx.Prepare(`INSERT INTO idx (key, value) VALUES (?, ?)`)
+		if err != nil {
+			return err
+		}
+		defer stmt.Close()
+		for key, src := range merged {
+			value := fmt.Sprintf("%s\t%s", src.Package, src.Version.String())
+			if _, err := stmt.Exec(key, value); err != nil {
+				return err
+			}
+		}
+		return tx.Commit()
+	}(); err != nil {
+		return err
+	}
+	if err := func() error {
 		return write.Atomically(filepath.Join(*indexDir, "sources.index"), func(w io.Writer) error {
+			start := time.Now()
+			defer func() { log.Printf("index took %v", time.Since(start)) }()
+
 			return merged.Encode(w)
 		})
-	})
+	}(); err != nil {
+		return err
+	}
 
 	weg.Go(func() error {
 		return write.Atomically(filepath.Join(*indexDir, "completion.bin.txt"), func(w io.Writer) error {
@@ -3,6 +3,7 @@ package main
 import (
 	"bufio"
 	"bytes"
+	"database/sql"
 	"encoding/binary"
 	"errors"
 	"fmt"
@@ -16,6 +17,8 @@ import (
 	"time"
 
 	"github.com/Debian/pk4/internal/index"
+
+	_ "github.com/mattn/go-sqlite3"
 )
 
 var notFound = errors.New("key not found in index")
@@ -137,7 +140,20 @@ func (inv *invocation) lookupDSC(srcpkg, srcversion string) (index.DSC, error) {
 }
 
 func (inv *invocation) lookup(key string) (srcpkg string, srcversion string, _ error) {
-	val, err := lookup(filepath.Join(inv.indexDir, "sources.index"), key)
+	start := time.Now()
+	db, err := sql.Open("sqlite3", "file://"+filepath.Join(inv.indexDir, "sources.sqlite3")+"?immutable=1")
+	if err != nil {
+		return "", "", err
+	}
+	defer db.Close()
+	var val string
+	err = db.QueryRow(`SELECT value FROM idx WHERE key = ?`, key).Scan(&val)
+	dur := time.Since(start)
+	log.Printf("sqlite3 lookup took %v", dur)
+	start = time.Now()
+	val, err = lookup(filepath.Join(inv.indexDir, "sources.index"), key)
+	dur = time.Since(start)
+	log.Printf("index lookup took %v", dur)
 	if err != nil {
 		return "", "", err
 	}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
Projects
None yet
Development

No branches or pull requests

1 participant