Skip to content

Commit

Permalink
Add strcase package
Browse files Browse the repository at this point in the history
- `ToPascalCase`, `ToCamelCase`, and `ToSnakeCase` will transform any
  input to that form.
- Support for unicode runes
- Support for all-uppercase initialisms, like mandated by the Go convention.
- Expose `IsInitialism`
- Emphasis on reducing allocations for memory efficiency.
  • Loading branch information
lavoiesl committed Jan 26, 2021
1 parent bf9e870 commit 72f07d1
Show file tree
Hide file tree
Showing 4 changed files with 431 additions and 0 deletions.
120 changes: 120 additions & 0 deletions strcase/id.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
package strcase

import (
"math"
"strings"
"unicode"
)

func ToPascalCase(input string) string {
return splitJoin(input, 0, 0)
}

func ToCamelCase(input string) string {
return splitJoin(input, 1, 0)
}

func ToSnakeCase(input string) string {
return splitJoin(input, math.MaxInt64, '_')
}

func allocateBuilder(input string, separator rune) *strings.Builder {
var b strings.Builder
length := len(input)
if separator != 0 {
// Heuristic to add about 25% buffer for separators
// Not having perfect match isn't terrible, it will only result in a few more memory allocations.
// Ex:
// foo_bar_baz: 9 original chars, 11 final. 9 * 5 / 4 = 11
// foo_id: 5 original chars, 6 final. 5 * 5 / 4 = 6
// a_b_c_d: 4 original chars, 7 final. 4 * 5 / 4 = 5, which will result in an extra allocation.
length = length * 5 / 4
}

b.Grow(length)
return &b
}

func splitJoin(input string, firstUpper int, separator rune) string {
b := allocateBuilder(input, separator)
var buf []rune
var currentPartIndex int
var lastCategory runeCategory

// Flush the buffer as a part
flush := func() {
if len(buf) == 0 {
// Nothing was added since last flush
return
}
if separator != 0 && currentPartIndex > 0 {
b.WriteRune(separator)
}
if currentPartIndex >= firstUpper {
pascalPart(buf)
}
for _, r := range buf {
b.WriteRune(r)
}
currentPartIndex++
lastCategory = unknown
buf = buf[0:0] // Clear buffer, but keep current allocation
}

for _, r := range input {
switch cat := category(r); cat {
case upper:
if lastCategory != upper {
flush()
}
lastCategory = cat
buf = append(buf, unicode.ToLower(r))
case lower, number:
if (lastCategory > number) != (cat > number) {
flush()
}
lastCategory = cat
buf = append(buf, r)
default:
// separator
flush()
}
}
flush()

return b.String()
}

// Convert to uppercase if initialism.
// Convert first rune to uppercase otherwise.
func pascalPart(part []rune) {
if isInitialism(part) {
for ri, r := range part {
part[ri] = unicode.ToUpper(r)
}
} else {
part[0] = unicode.ToUpper(part[0])
}
}

type runeCategory int

const (
unknown runeCategory = iota
number
lower
upper
)

func category(r rune) runeCategory {
switch {
case unicode.IsLower(r):
return lower
case unicode.IsUpper(r):
return upper
case unicode.IsNumber(r):
return number
default:
return unknown
}
}
191 changes: 191 additions & 0 deletions strcase/id_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
package strcase

import (
"fmt"
"strings"
"testing"

"github.com/stretchr/testify/require"
)

// splitjoin_l1_p1 38.1 ns/op 16 B/op 1 allocs/op
// IDToCamelCase_l1_p1 88.6 ns/op 48 B/op 3 allocs/op
// IDToSnakeCase_l1_p1 87.7 ns/op 48 B/op 3 allocs/op
//
// splitjoin_l1_p10 253 ns/op 176 B/op 2 allocs/op
// IDToCamelCase_l1_p10 421 ns/op 72 B/op 3 allocs/op
// IDToSnakeCase_l1_p10 269 ns/op 72 B/op 3 allocs/op
//
// splitjoin_l1_p100 2137 ns/op 1904 B/op 2 allocs/op
// IDToCamelCase_l1_p100 3503 ns/op 248 B/op 3 allocs/op
// IDToSnakeCase_l1_p100 1879 ns/op 296 B/op 3 allocs/op
//
// splitjoin_l10_p1 38.0 ns/op 16 B/op 1 allocs/op
// IDToCamelCase_l10_p1 247 ns/op 168 B/op 6 allocs/op
// IDToSnakeCase_l10_p1 248 ns/op 168 B/op 6 allocs/op
//
// splitjoin_l10_p10 278 ns/op 272 B/op 2 allocs/op
// IDToCamelCase_l10_p10 1140 ns/op 264 B/op 6 allocs/op
// IDToSnakeCase_l10_p10 979 ns/op 296 B/op 6 allocs/op
//
// splitjoin_l10_p100 2267 ns/op 2816 B/op 2 allocs/op
// IDToCamelCase_l10_p100 9538 ns/op 1304 B/op 6 allocs/op
// IDToSnakeCase_l10_p100 8147 ns/op 1560 B/op 6 allocs/op
//
// splitjoin_l100_p1 41.1 ns/op 16 B/op 1 allocs/op
// IDToCamelCase_l100_p1 1114 ns/op 1160 B/op 9 allocs/op
// IDToSnakeCase_l100_p1 1104 ns/op 1176 B/op 9 allocs/op
//
// splitjoin_l100_p10 446 ns/op 1184 B/op 2 allocs/op
// IDToCamelCase_l100_p10 7692 ns/op 2072 B/op 9 allocs/op
// IDToSnakeCase_l100_p10 7589 ns/op 2328 B/op 9 allocs/op
//
// splitjoin_l100_p100 3877 ns/op 12032 B/op 2 allocs/op
// IDToCamelCase_l100_p100 72671 ns/op 11288 B/op 9 allocs/op
// IDToSnakeCase_l100_p100 71673 ns/op 14616 B/op 9 allocs/op
func Benchmark_splitJoin(b *testing.B) {
for _, length := range []int{1, 10, 100} {
part := strings.Repeat("a", length)

for _, count := range []int{1, 10, 100} {
input := part + strings.Repeat("_"+part, count-1)

// Baseline, split and join all parts
b.Run(fmt.Sprintf("splitjoin_l%d_p%d", length, count), func(b *testing.B) {
for i := 0; i < b.N; i++ {
strings.Join(strings.Split(input, "_"), "")
}
})

b.Run(fmt.Sprintf("IDToCamelCase_l%d_p%d", length, count), func(b *testing.B) {
for i := 0; i < b.N; i++ {
ToCamelCase(input)
}
})

b.Run(fmt.Sprintf("IDToSnakeCase_l%d_p%d", length, count), func(b *testing.B) {
for i := 0; i < b.N; i++ {
ToSnakeCase(input)
}
})
}
}
}

// lower 5.03 ns/op 0 B/op 0 allocs/op
// upper 5.81 ns/op 0 B/op 0 allocs/op
// number 6.59 ns/op 0 B/op 0 allocs/op
// symbol 6.58 ns/op 0 B/op 0 allocs/op
// 16_bits 153 ns/op 0 B/op 0 allocs/op
// 32_bits 160 ns/op 0 B/op 0 allocs/op
func Benchmark_category(b *testing.B) {
tests := map[string][]rune{
"lower": {'a', 'b'},
"upper": {'A', 'B'},
"number": {'0', '1'},
"symbol": {'_', ' '},
"16 bits": {'™', '∞', '•', 'Ω'},
"32 bits": {'𠁂', '𠁄', '𠁔', '𠁑'},
}
for name, runes := range tests {
b.Run(name, func(b *testing.B) {
for i := 0; i < b.N; i++ {
for _, r := range runes {
category(r)
}
}
})
}
}

func Test_splitJoin(t *testing.T) {
tests := []struct {
input string
camel string
pascal string
snake string
}{
{
// everything empty
},
{
input: "a",
pascal: "A",
camel: "a",
snake: "a",
},
{
input: "A",
pascal: "A",
camel: "a",
snake: "a",
},
{
input: "a_a",
pascal: "AA",
camel: "aA",
snake: "a_a",
},
{
input: "__a___a_",
pascal: "AA",
camel: "aA",
snake: "a_a",
},
{
input: "aa_bbb",
pascal: "AaBbb",
camel: "aaBbb",
snake: "aa_bbb",
},
{
input: "aa_id",
pascal: "AaID",
camel: "aaID",
snake: "aa_id",
},
{
input: "fooBar",
pascal: "FooBar",
camel: "fooBar",
snake: "foo_bar",
},
{
input: "FooBAR",
pascal: "FooBar",
camel: "fooBar",
snake: "foo_bar",
},
{
input: "fooUrl",
pascal: "FooURL",
camel: "fooURL",
snake: "foo_url",
},
{
input: "fooURL",
pascal: "FooURL",
camel: "fooURL",
snake: "foo_url",
},
{
input: "url10",
pascal: "URL10",
camel: "url10",
snake: "url_10",
},
{
input: "url_id",
pascal: "URLID",
camel: "urlID",
snake: "url_id",
},
}
for _, tt := range tests {
t.Run(tt.input, func(t *testing.T) {
require.Equal(t, tt.pascal, ToPascalCase(tt.input))
require.Equal(t, tt.camel, ToCamelCase(tt.input))
require.Equal(t, tt.snake, ToSnakeCase(tt.input))
})
}
}
83 changes: 83 additions & 0 deletions strcase/initialism.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
package strcase

import "sort"

var commonInitialisms [][]rune

func init() {
// To follow go's convention of have acronyms in all caps, hard code a few of the common ones
// Taken from https://github.com/golang/lint/blob/83fdc39ff7b56453e3793356bcff3070b9b96445/lint.go#L770-L809
var initialisms = []string{
"acl",
"api",
"ascii",
"cpu",
"css",
"dns",
"eof",
"guid",
"html",
"http",
"https",
"id",
"ip",
"json",
"lhs",
"qps",
"ram",
"rhs",
"rpc",
"sla",
"smtp",
"sql",
"ssh",
"tcp",
"tls",
"ttl",
"udp",
"ui",
"uid",
"uuid",
"uri",
"url",
"utf8",
"vm",
"xml",
"xmpp",
"xsrf",
"xss",
}
sort.Strings(initialisms)

for _, initialism := range initialisms {
commonInitialisms = append(commonInitialisms, []rune(initialism))
}
}

func IsInitialism(part string) bool {
return isInitialism([]rune(part))
}

func isInitialism(part []rune) bool {
// Adapted from sort.Search to benefit from the fact that we only deal with rune slices
i := 0
j := len(commonInitialisms)
out:
for i < j {
h := int(uint(i+j) >> 1) // avoid overflow when computing h
// i ≤ h < j

for k, r := range commonInitialisms[h] {
switch {
case len(part) < k+1 || part[k] < r:
j = h
continue out
case part[k] > r:
i = h + 1
continue out
}
}
return true
}
return false
}

0 comments on commit 72f07d1

Please sign in to comment.