Skip to content

Commit

Permalink
perf: cache regexp compiles (#213)
Browse files Browse the repository at this point in the history
  • Loading branch information
G-Rath committed Sep 5, 2023
1 parent 6eea933 commit 48310bb
Show file tree
Hide file tree
Showing 13 changed files with 66 additions and 47 deletions.
18 changes: 18 additions & 0 deletions internal/cachedregexp/regex.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
package cachedregexp

import (
"regexp"
"sync"
)

//nolint:gochecknoglobals // this is the whole point of being a cache
var cache sync.Map

func MustCompile(exp string) *regexp.Regexp {
compiled, ok := cache.Load(exp)
if !ok {
compiled, _ = cache.LoadOrStore(exp, regexp.MustCompile(exp))
}

return compiled.(*regexp.Regexp)
}
17 changes: 9 additions & 8 deletions main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package main
import (
"bytes"
"fmt"
"github.com/g-rath/osv-detector/internal/cachedregexp"
"github.com/google/go-cmp/cmp"
"os"
"path/filepath"
Expand All @@ -18,17 +19,17 @@ func dedent(t *testing.T, str string) string {
str = strings.ReplaceAll(str, "\t", " ")

// 1. remove trailing whitespace
re := regexp.MustCompile(`\r?\n([\t ]*)$`)
re := cachedregexp.MustCompile(`\r?\n([\t ]*)$`)
str = re.ReplaceAllString(str, "")

// 2. if any of the lines are not indented, return as we're already dedent-ed
re = regexp.MustCompile(`(^|\r?\n)[^\t \n]`)
re = cachedregexp.MustCompile(`(^|\r?\n)[^\t \n]`)
if re.MatchString(str) {
return str
}

// 3. find all line breaks to determine the highest common indentation level
re = regexp.MustCompile(`\n[\t ]+`)
re = cachedregexp.MustCompile(`\n[\t ]+`)
matches := re.FindAllString(str, -1)

// 4. remove the common indentation from all strings
Expand All @@ -41,12 +42,12 @@ func dedent(t *testing.T, str string) string {
}
}

re := regexp.MustCompile(`\n[\t ]{` + fmt.Sprint(size) + `}`)
re := cachedregexp.MustCompile(`\n[\t ]{` + fmt.Sprint(size) + `}`)
str = re.ReplaceAllString(str, "\n")
}

// 5. Remove leading whitespace.
re = regexp.MustCompile(`^\r?\n`)
re = cachedregexp.MustCompile(`^\r?\n`)
str = re.ReplaceAllString(str, "")

return str
Expand All @@ -60,7 +61,7 @@ func areEqual(t *testing.T, actual, expect string) bool {
expect = regexp.QuoteMeta(expect)
expect = strings.ReplaceAll(expect, "%%", ".+")

re := regexp.MustCompile(`^` + expect + `$`)
re := cachedregexp.MustCompile(`^` + expect + `$`)

return re.MatchString(actual)
}
Expand All @@ -85,7 +86,7 @@ func normalizeFilePaths(output string) string {
// the number of vulnerabilities and the time that the database was last updated)
// in the output with %% wildcards, in order to reduce the noise of the cmp diff
func wildcardDatabaseStats(str string) string {
re := regexp.MustCompile(`(\w+) \(\d+ vulnerabilities, including withdrawn - last updated \w{3}, \d\d \w{3} \d{4} [012]\d:\d\d:\d\d GMT\)`)
re := cachedregexp.MustCompile(`(\w+) \(\d+ vulnerabilities, including withdrawn - last updated \w{3}, \d\d \w{3} \d{4} [012]\d:\d\d:\d\d GMT\)`)

return re.ReplaceAllString(str, "$1 (%% vulnerabilities, including withdrawn - last updated %%)")
}
Expand Down Expand Up @@ -1384,7 +1385,7 @@ func TestRun_EndToEnd(t *testing.T) {
}

tests := make([]cliTestCase, 0, len(files)/2)
re := regexp.MustCompile(`\d+-(.*)`)
re := cachedregexp.MustCompile(`\d+-(.*)`)

for _, f := range files {
if strings.HasSuffix(f.Name(), ".out.txt") {
Expand Down
4 changes: 2 additions & 2 deletions pkg/database/osv.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@ import (
"encoding/json"
"fmt"
"github.com/g-rath/osv-detector/internal"
"github.com/g-rath/osv-detector/internal/cachedregexp"
"github.com/g-rath/osv-detector/pkg/lockfile"
"github.com/g-rath/osv-detector/pkg/semantic"
"os"
"regexp"
"sort"
"strings"
"time"
Expand Down Expand Up @@ -43,7 +43,7 @@ func (p Package) NormalizedName() string {
}

// per https://www.python.org/dev/peps/pep-0503/#normalized-names
name := regexp.MustCompile(`[-_.]+`).ReplaceAllString(p.Name, "-")
name := cachedregexp.MustCompile(`[-_.]+`).ReplaceAllString(p.Name, "-")

return strings.ToLower(name)
}
Expand Down
12 changes: 6 additions & 6 deletions pkg/lockfile/parse-gemfile-lock.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ package lockfile

import (
"fmt"
"github.com/g-rath/osv-detector/internal/cachedregexp"
"log"
"os"
"regexp"
"strings"
)

Expand Down Expand Up @@ -55,8 +55,8 @@ func (parser *gemfileLockfileParser) addDependency(name string, version string)
}

func (parser *gemfileLockfileParser) parseSpec(line string) {
// nameVersionReg := regexp.MustCompile(`^( {2}| {4}| {6})(?! )(.*?)(?: \(([^-]*)(?:-(.*))?\))?(!)?$`)
nameVersionReg := regexp.MustCompile(`^( +)(.*?)(?: \(([^-]*)(?:-(.*))?\))?(!)?$`)
// nameVersionReg := cachedregexp.MustCompile(`^( {2}| {4}| {6})(?! )(.*?)(?: \(([^-]*)(?:-(.*))?\))?(!)?$`)
nameVersionReg := cachedregexp.MustCompile(`^( +)(.*?)(?: \(([^-]*)(?:-(.*))?\))?(!)?$`)

results := nameVersionReg.FindStringSubmatch(line)

Expand All @@ -82,7 +82,7 @@ func (parser *gemfileLockfileParser) parseSource(line string) {
}

// OPTIONS = /^ ([a-z]+): (.*)$/i.freeze
optionsRegexp := regexp.MustCompile(`(?i)^ {2}([a-z]+): (.*)$`)
optionsRegexp := cachedregexp.MustCompile(`(?i)^ {2}([a-z]+): (.*)$`)

// todo: support
options := optionsRegexp.FindStringSubmatch(line)
Expand All @@ -105,7 +105,7 @@ func (parser *gemfileLockfileParser) parseSource(line string) {
}

func isNotIndented(line string) bool {
re := regexp.MustCompile(`^\S`)
re := cachedregexp.MustCompile(`^\S`)

return re.MatchString(line)
}
Expand All @@ -127,7 +127,7 @@ func (parser *gemfileLockfileParser) parseLineBasedOnState(line string) {
}

func (parser *gemfileLockfileParser) parse(contents string) {
lineMatcher := regexp.MustCompile(`(?:\r?\n)+`)
lineMatcher := cachedregexp.MustCompile(`(?:\r?\n)+`)

lines := lineMatcher.Split(contents, -1)

Expand Down
6 changes: 3 additions & 3 deletions pkg/lockfile/parse-maven-lock.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ package lockfile
import (
"encoding/xml"
"fmt"
"github.com/g-rath/osv-detector/internal/cachedregexp"
"os"
"regexp"
)

type MavenLockDependency struct {
Expand All @@ -15,7 +15,7 @@ type MavenLockDependency struct {
}

func (mld MavenLockDependency) parseResolvedVersion(version string) string {
versionRequirementReg := regexp.MustCompile(`[[(]?(.*?)(?:,|[)\]]|$)`)
versionRequirementReg := cachedregexp.MustCompile(`[[(]?(.*?)(?:,|[)\]]|$)`)

results := versionRequirementReg.FindStringSubmatch(version)

Expand All @@ -27,7 +27,7 @@ func (mld MavenLockDependency) parseResolvedVersion(version string) string {
}

func (mld MavenLockDependency) resolveVersionValue(lockfile MavenLockFile) string {
interpolationReg := regexp.MustCompile(`\${(.+)}`)
interpolationReg := cachedregexp.MustCompile(`\${(.+)}`)

results := interpolationReg.FindStringSubmatch(mld.Version)

Expand Down
4 changes: 2 additions & 2 deletions pkg/lockfile/parse-mix-lock.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ package lockfile
import (
"bufio"
"fmt"
"github.com/g-rath/osv-detector/internal/cachedregexp"
"os"
"regexp"
"strings"
)

Expand All @@ -17,7 +17,7 @@ func ParseMixLock(pathToLockfile string) ([]PackageDetails, error) {
}
defer file.Close()

re := regexp.MustCompile(`^ +"(\w+)": \{.+,$`)
re := cachedregexp.MustCompile(`^ +"(\w+)": \{.+,$`)

scanner := bufio.NewScanner(file)

Expand Down
8 changes: 4 additions & 4 deletions pkg/lockfile/parse-pnpm-lock.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ package lockfile

import (
"fmt"
"github.com/g-rath/osv-detector/internal/cachedregexp"
"gopkg.in/yaml.v2"
"os"
"regexp"
"strconv"
"strings"
)
Expand Down Expand Up @@ -54,7 +54,7 @@ func (l *PnpmLockfile) UnmarshalYAML(unmarshal func(interface{}) error) error {
const PnpmEcosystem = NpmEcosystem

func startsWithNumber(str string) bool {
matcher := regexp.MustCompile(`^\d`)
matcher := cachedregexp.MustCompile(`^\d`)

return matcher.MatchString(str)
}
Expand Down Expand Up @@ -105,7 +105,7 @@ func extractPnpmPackageNameAndVersion(dependencyPath string) (string, string) {

func parseNameAtVersion(value string) (name string, version string) {
// look for pattern "name@version", where name is allowed to contain zero or more "@"
matches := regexp.MustCompile(`^(.+)@([\w.-]+)(?:\(|$)`).FindStringSubmatch(value)
matches := cachedregexp.MustCompile(`^(.+)@([\w.-]+)(?:\(|$)`).FindStringSubmatch(value)

if len(matches) != 3 {
return name, ""
Expand Down Expand Up @@ -139,7 +139,7 @@ func parsePnpmLock(lockfile PnpmLockfile) []PackageDetails {
commit := pkg.Resolution.Commit

if strings.HasPrefix(pkg.Resolution.Tarball, "https://codeload.github.com") {
re := regexp.MustCompile(`https://codeload\.github\.com(?:/[\w-.]+){2}/tar\.gz/(\w+)$`)
re := cachedregexp.MustCompile(`https://codeload\.github\.com(?:/[\w-.]+){2}/tar\.gz/(\w+)$`)
matched := re.FindStringSubmatch(pkg.Resolution.Tarball)

if matched != nil {
Expand Down
8 changes: 4 additions & 4 deletions pkg/lockfile/parse-requirements-txt.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@ package lockfile
import (
"bufio"
"fmt"
"github.com/g-rath/osv-detector/internal/cachedregexp"
"os"
"path/filepath"
"regexp"
"strings"
)

Expand Down Expand Up @@ -67,15 +67,15 @@ func parseLine(line string) PackageDetails {
// than false negatives, and can be dealt with when/if it actually happens.
func normalizedRequirementName(name string) string {
// per https://www.python.org/dev/peps/pep-0503/#normalized-names
name = regexp.MustCompile(`[-_.]+`).ReplaceAllString(name, "-")
name = cachedregexp.MustCompile(`[-_.]+`).ReplaceAllString(name, "-")
name = strings.ToLower(name)
name = strings.Split(name, "[")[0]

return name
}

func removeComments(line string) string {
var re = regexp.MustCompile(`(^|\s+)#.*$`)
var re = cachedregexp.MustCompile(`(^|\s+)#.*$`)

return strings.TrimSpace(re.ReplaceAllString(line, ""))
}
Expand All @@ -95,7 +95,7 @@ func isNotRequirementLine(line string) bool {
func isLineContinuation(line string) bool {
// checks that the line ends with an odd number of backslashes,
// meaning the last one isn't escaped
var re = regexp.MustCompile(`([^\\]|^)(\\{2})*\\$`)
var re = cachedregexp.MustCompile(`([^\\]|^)(\\{2})*\\$`)

return re.MatchString(line)
}
Expand Down
8 changes: 4 additions & 4 deletions pkg/lockfile/parse-yarn-lock.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@ package lockfile
import (
"bufio"
"fmt"
"github.com/g-rath/osv-detector/internal/cachedregexp"
"net/url"
"os"
"regexp"
"strings"
)

Expand Down Expand Up @@ -63,7 +63,7 @@ func extractYarnPackageName(str string) string {
}

func determineYarnPackageVersion(group []string) string {
re := regexp.MustCompile(`^ {2}"?version"?:? "?([\w-.]+)"?$`)
re := cachedregexp.MustCompile(`^ {2}"?version"?:? "?([\w-.]+)"?$`)

for _, s := range group {
matched := re.FindStringSubmatch(s)
Expand All @@ -78,7 +78,7 @@ func determineYarnPackageVersion(group []string) string {
}

func determineYarnPackageResolution(group []string) string {
re := regexp.MustCompile(`^ {2}"?(?:resolution:|resolved)"? "([^ '"]+)"$`)
re := cachedregexp.MustCompile(`^ {2}"?(?:resolution:|resolved)"? "([^ '"]+)"$`)

for _, s := range group {
matched := re.FindStringSubmatch(s)
Expand Down Expand Up @@ -111,7 +111,7 @@ func tryExtractCommit(resolution string) string {
}

for _, matcher := range matchers {
re := regexp.MustCompile(matcher)
re := cachedregexp.MustCompile(matcher)
matched := re.FindStringSubmatch(resolution)

if matched != nil {
Expand Down
6 changes: 3 additions & 3 deletions pkg/semantic/version-maven.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ package semantic

import (
"fmt"
"regexp"
"github.com/g-rath/osv-detector/internal/cachedregexp"
"sort"
"strings"
)
Expand Down Expand Up @@ -175,11 +175,11 @@ func (mv MavenVersion) lessThan(mw MavenVersion) bool {
// According to Maven's implementation, any non-digit is a "character":
// https://github.com/apache/maven/blob/965aaa53da5c2d814e94a41d37142d0d6830375d/maven-artifact/src/main/java/org/apache/maven/artifact/versioning/ComparableVersion.java#L627
func mavenFindTransitions(token string) (ints []int) {
for _, span := range regexp.MustCompile(`\D\d`).FindAllStringIndex(token, -1) {
for _, span := range cachedregexp.MustCompile(`\D\d`).FindAllStringIndex(token, -1) {
ints = append(ints, span[0]+1)
}

for _, span := range regexp.MustCompile(`\d\D`).FindAllStringIndex(token, -1) {
for _, span := range cachedregexp.MustCompile(`\d\D`).FindAllStringIndex(token, -1) {
ints = append(ints, span[0]+1)
}

Expand Down
8 changes: 4 additions & 4 deletions pkg/semantic/version-packagist.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package semantic

import (
"regexp"
"github.com/g-rath/osv-detector/internal/cachedregexp"
"strconv"
"strings"
)
Expand All @@ -15,9 +15,9 @@ func canonicalizePackagistVersion(v string) string {
// the trimming...)
v = strings.TrimPrefix(strings.TrimPrefix(v, "v"), "V")

v = regexp.MustCompile(`[-_+]`).ReplaceAllString(v, ".")
v = regexp.MustCompile(`([^\d.])(\d)`).ReplaceAllString(v, "$1.$2")
v = regexp.MustCompile(`(\d)([^\d.])`).ReplaceAllString(v, "$1.$2")
v = cachedregexp.MustCompile(`[-_+]`).ReplaceAllString(v, ".")
v = cachedregexp.MustCompile(`([^\d.])(\d)`).ReplaceAllString(v, "$1.$2")
v = cachedregexp.MustCompile(`(\d)([^\d.])`).ReplaceAllString(v, "$1.$2")

return v
}
Expand Down

0 comments on commit 48310bb

Please sign in to comment.