-
Notifications
You must be signed in to change notification settings - Fork 14
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- `ToPascalCase`, `ToCamelCase`, and `ToSnakeCase` will transform any input to that form. - Support for unicode runes - Support for all-uppercase initialisms, like mandated by the Go convention. - Expose `IsInitialism` - Emphasis on reducing allocations for memory efficiency.
- Loading branch information
Showing
4 changed files
with
431 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,120 @@ | ||
package strcase | ||
|
||
import ( | ||
"math" | ||
"strings" | ||
"unicode" | ||
) | ||
|
||
func ToPascalCase(input string) string { | ||
return splitJoin(input, 0, 0) | ||
} | ||
|
||
func ToCamelCase(input string) string { | ||
return splitJoin(input, 1, 0) | ||
} | ||
|
||
func ToSnakeCase(input string) string { | ||
return splitJoin(input, math.MaxInt64, '_') | ||
} | ||
|
||
func allocateBuilder(input string, separator rune) *strings.Builder { | ||
var b strings.Builder | ||
length := len(input) | ||
if separator != 0 { | ||
// Heuristic to add about 25% buffer for separators | ||
// Not having perfect match isn't terrible, it will only result in a few more memory allocations. | ||
// Ex: | ||
// foo_bar_baz: 9 original chars, 11 final. 9 * 5 / 4 = 11 | ||
// foo_id: 5 original chars, 6 final. 5 * 5 / 4 = 6 | ||
// a_b_c_d: 4 original chars, 7 final. 4 * 5 / 4 = 5, which will result in an extra allocation. | ||
length = length * 5 / 4 | ||
} | ||
|
||
b.Grow(length) | ||
return &b | ||
} | ||
|
||
func splitJoin(input string, firstUpper int, separator rune) string { | ||
b := allocateBuilder(input, separator) | ||
var buf []rune | ||
var currentPartIndex int | ||
var lastCategory runeCategory | ||
|
||
// Flush the buffer as a part | ||
flush := func() { | ||
if len(buf) == 0 { | ||
// Nothing was added since last flush | ||
return | ||
} | ||
if separator != 0 && currentPartIndex > 0 { | ||
b.WriteRune(separator) | ||
} | ||
if currentPartIndex >= firstUpper { | ||
pascalPart(buf) | ||
} | ||
for _, r := range buf { | ||
b.WriteRune(r) | ||
} | ||
currentPartIndex++ | ||
lastCategory = unknown | ||
buf = buf[0:0] // Clear buffer, but keep current allocation | ||
} | ||
|
||
for _, r := range input { | ||
switch cat := category(r); cat { | ||
case upper: | ||
if lastCategory != upper { | ||
flush() | ||
} | ||
lastCategory = cat | ||
buf = append(buf, unicode.ToLower(r)) | ||
case lower, number: | ||
if (lastCategory > number) != (cat > number) { | ||
flush() | ||
} | ||
lastCategory = cat | ||
buf = append(buf, r) | ||
default: | ||
// separator | ||
flush() | ||
} | ||
} | ||
flush() | ||
|
||
return b.String() | ||
} | ||
|
||
// Convert to uppercase if initialism. | ||
// Convert first rune to uppercase otherwise. | ||
func pascalPart(part []rune) { | ||
if isInitialism(part) { | ||
for ri, r := range part { | ||
part[ri] = unicode.ToUpper(r) | ||
} | ||
} else { | ||
part[0] = unicode.ToUpper(part[0]) | ||
} | ||
} | ||
|
||
type runeCategory int | ||
|
||
const ( | ||
unknown runeCategory = iota | ||
number | ||
lower | ||
upper | ||
) | ||
|
||
func category(r rune) runeCategory { | ||
switch { | ||
case unicode.IsLower(r): | ||
return lower | ||
case unicode.IsUpper(r): | ||
return upper | ||
case unicode.IsNumber(r): | ||
return number | ||
default: | ||
return unknown | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,191 @@ | ||
package strcase | ||
|
||
import ( | ||
"fmt" | ||
"strings" | ||
"testing" | ||
|
||
"github.com/stretchr/testify/require" | ||
) | ||
|
||
// splitjoin_l1_p1 38.1 ns/op 16 B/op 1 allocs/op | ||
// IDToCamelCase_l1_p1 88.6 ns/op 48 B/op 3 allocs/op | ||
// IDToSnakeCase_l1_p1 87.7 ns/op 48 B/op 3 allocs/op | ||
// | ||
// splitjoin_l1_p10 253 ns/op 176 B/op 2 allocs/op | ||
// IDToCamelCase_l1_p10 421 ns/op 72 B/op 3 allocs/op | ||
// IDToSnakeCase_l1_p10 269 ns/op 72 B/op 3 allocs/op | ||
// | ||
// splitjoin_l1_p100 2137 ns/op 1904 B/op 2 allocs/op | ||
// IDToCamelCase_l1_p100 3503 ns/op 248 B/op 3 allocs/op | ||
// IDToSnakeCase_l1_p100 1879 ns/op 296 B/op 3 allocs/op | ||
// | ||
// splitjoin_l10_p1 38.0 ns/op 16 B/op 1 allocs/op | ||
// IDToCamelCase_l10_p1 247 ns/op 168 B/op 6 allocs/op | ||
// IDToSnakeCase_l10_p1 248 ns/op 168 B/op 6 allocs/op | ||
// | ||
// splitjoin_l10_p10 278 ns/op 272 B/op 2 allocs/op | ||
// IDToCamelCase_l10_p10 1140 ns/op 264 B/op 6 allocs/op | ||
// IDToSnakeCase_l10_p10 979 ns/op 296 B/op 6 allocs/op | ||
// | ||
// splitjoin_l10_p100 2267 ns/op 2816 B/op 2 allocs/op | ||
// IDToCamelCase_l10_p100 9538 ns/op 1304 B/op 6 allocs/op | ||
// IDToSnakeCase_l10_p100 8147 ns/op 1560 B/op 6 allocs/op | ||
// | ||
// splitjoin_l100_p1 41.1 ns/op 16 B/op 1 allocs/op | ||
// IDToCamelCase_l100_p1 1114 ns/op 1160 B/op 9 allocs/op | ||
// IDToSnakeCase_l100_p1 1104 ns/op 1176 B/op 9 allocs/op | ||
// | ||
// splitjoin_l100_p10 446 ns/op 1184 B/op 2 allocs/op | ||
// IDToCamelCase_l100_p10 7692 ns/op 2072 B/op 9 allocs/op | ||
// IDToSnakeCase_l100_p10 7589 ns/op 2328 B/op 9 allocs/op | ||
// | ||
// splitjoin_l100_p100 3877 ns/op 12032 B/op 2 allocs/op | ||
// IDToCamelCase_l100_p100 72671 ns/op 11288 B/op 9 allocs/op | ||
// IDToSnakeCase_l100_p100 71673 ns/op 14616 B/op 9 allocs/op | ||
func Benchmark_splitJoin(b *testing.B) { | ||
for _, length := range []int{1, 10, 100} { | ||
part := strings.Repeat("a", length) | ||
|
||
for _, count := range []int{1, 10, 100} { | ||
input := part + strings.Repeat("_"+part, count-1) | ||
|
||
// Baseline, split and join all parts | ||
b.Run(fmt.Sprintf("splitjoin_l%d_p%d", length, count), func(b *testing.B) { | ||
for i := 0; i < b.N; i++ { | ||
strings.Join(strings.Split(input, "_"), "") | ||
} | ||
}) | ||
|
||
b.Run(fmt.Sprintf("IDToCamelCase_l%d_p%d", length, count), func(b *testing.B) { | ||
for i := 0; i < b.N; i++ { | ||
ToCamelCase(input) | ||
} | ||
}) | ||
|
||
b.Run(fmt.Sprintf("IDToSnakeCase_l%d_p%d", length, count), func(b *testing.B) { | ||
for i := 0; i < b.N; i++ { | ||
ToSnakeCase(input) | ||
} | ||
}) | ||
} | ||
} | ||
} | ||
|
||
// lower 5.03 ns/op 0 B/op 0 allocs/op | ||
// upper 5.81 ns/op 0 B/op 0 allocs/op | ||
// number 6.59 ns/op 0 B/op 0 allocs/op | ||
// symbol 6.58 ns/op 0 B/op 0 allocs/op | ||
// 16_bits 153 ns/op 0 B/op 0 allocs/op | ||
// 32_bits 160 ns/op 0 B/op 0 allocs/op | ||
func Benchmark_category(b *testing.B) { | ||
tests := map[string][]rune{ | ||
"lower": {'a', 'b'}, | ||
"upper": {'A', 'B'}, | ||
"number": {'0', '1'}, | ||
"symbol": {'_', ' '}, | ||
"16 bits": {'™', '∞', '•', 'Ω'}, | ||
"32 bits": {'𠁂', '𠁄', '𠁔', '𠁑'}, | ||
} | ||
for name, runes := range tests { | ||
b.Run(name, func(b *testing.B) { | ||
for i := 0; i < b.N; i++ { | ||
for _, r := range runes { | ||
category(r) | ||
} | ||
} | ||
}) | ||
} | ||
} | ||
|
||
func Test_splitJoin(t *testing.T) { | ||
tests := []struct { | ||
input string | ||
camel string | ||
pascal string | ||
snake string | ||
}{ | ||
{ | ||
// everything empty | ||
}, | ||
{ | ||
input: "a", | ||
pascal: "A", | ||
camel: "a", | ||
snake: "a", | ||
}, | ||
{ | ||
input: "A", | ||
pascal: "A", | ||
camel: "a", | ||
snake: "a", | ||
}, | ||
{ | ||
input: "a_a", | ||
pascal: "AA", | ||
camel: "aA", | ||
snake: "a_a", | ||
}, | ||
{ | ||
input: "__a___a_", | ||
pascal: "AA", | ||
camel: "aA", | ||
snake: "a_a", | ||
}, | ||
{ | ||
input: "aa_bbb", | ||
pascal: "AaBbb", | ||
camel: "aaBbb", | ||
snake: "aa_bbb", | ||
}, | ||
{ | ||
input: "aa_id", | ||
pascal: "AaID", | ||
camel: "aaID", | ||
snake: "aa_id", | ||
}, | ||
{ | ||
input: "fooBar", | ||
pascal: "FooBar", | ||
camel: "fooBar", | ||
snake: "foo_bar", | ||
}, | ||
{ | ||
input: "FooBAR", | ||
pascal: "FooBar", | ||
camel: "fooBar", | ||
snake: "foo_bar", | ||
}, | ||
{ | ||
input: "fooUrl", | ||
pascal: "FooURL", | ||
camel: "fooURL", | ||
snake: "foo_url", | ||
}, | ||
{ | ||
input: "fooURL", | ||
pascal: "FooURL", | ||
camel: "fooURL", | ||
snake: "foo_url", | ||
}, | ||
{ | ||
input: "url10", | ||
pascal: "URL10", | ||
camel: "url10", | ||
snake: "url_10", | ||
}, | ||
{ | ||
input: "url_id", | ||
pascal: "URLID", | ||
camel: "urlID", | ||
snake: "url_id", | ||
}, | ||
} | ||
for _, tt := range tests { | ||
t.Run(tt.input, func(t *testing.T) { | ||
require.Equal(t, tt.pascal, ToPascalCase(tt.input)) | ||
require.Equal(t, tt.camel, ToCamelCase(tt.input)) | ||
require.Equal(t, tt.snake, ToSnakeCase(tt.input)) | ||
}) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
package strcase | ||
|
||
import "sort" | ||
|
||
var commonInitialisms [][]rune | ||
|
||
func init() { | ||
// To follow go's convention of have acronyms in all caps, hard code a few of the common ones | ||
// Taken from https://github.com/golang/lint/blob/83fdc39ff7b56453e3793356bcff3070b9b96445/lint.go#L770-L809 | ||
var initialisms = []string{ | ||
"acl", | ||
"api", | ||
"ascii", | ||
"cpu", | ||
"css", | ||
"dns", | ||
"eof", | ||
"guid", | ||
"html", | ||
"http", | ||
"https", | ||
"id", | ||
"ip", | ||
"json", | ||
"lhs", | ||
"qps", | ||
"ram", | ||
"rhs", | ||
"rpc", | ||
"sla", | ||
"smtp", | ||
"sql", | ||
"ssh", | ||
"tcp", | ||
"tls", | ||
"ttl", | ||
"udp", | ||
"ui", | ||
"uid", | ||
"uuid", | ||
"uri", | ||
"url", | ||
"utf8", | ||
"vm", | ||
"xml", | ||
"xmpp", | ||
"xsrf", | ||
"xss", | ||
} | ||
sort.Strings(initialisms) | ||
|
||
for _, initialism := range initialisms { | ||
commonInitialisms = append(commonInitialisms, []rune(initialism)) | ||
} | ||
} | ||
|
||
func IsInitialism(part string) bool { | ||
return isInitialism([]rune(part)) | ||
} | ||
|
||
func isInitialism(part []rune) bool { | ||
// Adapted from sort.Search to benefit from the fact that we only deal with rune slices | ||
i := 0 | ||
j := len(commonInitialisms) | ||
out: | ||
for i < j { | ||
h := int(uint(i+j) >> 1) // avoid overflow when computing h | ||
// i ≤ h < j | ||
|
||
for k, r := range commonInitialisms[h] { | ||
switch { | ||
case len(part) < k+1 || part[k] < r: | ||
j = h | ||
continue out | ||
case part[k] > r: | ||
i = h + 1 | ||
continue out | ||
} | ||
} | ||
return true | ||
} | ||
return false | ||
} |
Oops, something went wrong.