Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add strcase package #86

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 3 additions & 0 deletions strcase/doc.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
// Package strcase regroups functions to convert between PascalCase, camelCase, and snake_case.
// ToPascalGoCase and ToCamelGoCase are also provided, which recognizes some common initialisms and always transforms them to uppercase.
package strcase
149 changes: 149 additions & 0 deletions strcase/id.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
package strcase

import (
"math"
"strings"
"unicode"
)

type CaseStrategy int

const (
CaseStrategyTitle CaseStrategy = 0
CaseStrategySkipFirstPart CaseStrategy = 1
CaseStrategyNever CaseStrategy = math.MaxInt64

NoSeparator rune = 0
)

// ToPascalCase transforms a string in any form to PascalCase.
func ToPascalCase(input string) string {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think something should say that those functions respect the Go initialisms pattern.
Either the package name, the method name, or at the very least the doc

Copy link
Contributor Author

@lavoiesl lavoiesl Mar 31, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point.

I could see something wanting or not wanting the go initialisms.

How about:

  • ToPascalCase
  • ToPascalGoCase
  • ToCamelCase
  • ToCamelGoCase
  • ToSnakeCase

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

return SplitJoin(input, CaseStrategyTitle, NoSeparator, false)
}

// ToPascalGoCase transforms a string in any form to PascalCase, but with recognized initialisms in uppercase, matching the Go style.
func ToPascalGoCase(input string) string {
return SplitJoin(input, CaseStrategyTitle, NoSeparator, true)
}

// ToCamelCase transforms a string in any form to camelCase.
func ToCamelCase(input string) string {
return SplitJoin(input, CaseStrategySkipFirstPart, NoSeparator, false)
}

// ToCamelGoCase transforms a string in any form to camelCase, but with recognized initialisms in uppercase, matching the Go style.
func ToCamelGoCase(input string) string {
return SplitJoin(input, CaseStrategySkipFirstPart, NoSeparator, true)
}

// ToSnakeCase transforms a string in any form to snake_case.
func ToSnakeCase(input string) string {
return SplitJoin(input, CaseStrategyNever, '_', false)
}

// ToHeaderField transforms a string in any form to An-HTTP-Header.
func ToHeaderField(input string) string {
return SplitJoin(input, CaseStrategyTitle, '-', true)
}

func SplitJoin(input string, caseStrategy CaseStrategy, separator rune, initialism bool) string {
firstUpper := int(caseStrategy)
b := allocateBuilder(input, separator)
var buf []rune
var currentPartIndex int
var lastCategory runeCategory

// Flush the buffer as a part
flush := func() {
if len(buf) == 0 {
// Nothing was added since last flush
return
}
if separator != NoSeparator && currentPartIndex > 0 {
b.WriteRune(separator)
}
if currentPartIndex >= firstUpper {
pascalPart(buf, initialism)
}
for _, r := range buf {
b.WriteRune(r)
}
currentPartIndex++
lastCategory = unknown
buf = buf[0:0] // Clear buffer, but keep current allocation
}

for _, r := range input {
switch cat := category(r); cat {
case upper:
if lastCategory != upper {
flush()
}
lastCategory = cat
buf = append(buf, unicode.ToLower(r))
case lower, number:
if (lastCategory > number) != (cat > number) {
flush()
}
lastCategory = cat
buf = append(buf, r)
default:
// separator
flush()
}
}
flush()

return b.String()
}

func allocateBuilder(input string, separator rune) *strings.Builder {
var b strings.Builder
length := len(input)
if separator != NoSeparator {
// Heuristic to add about 25% buffer for separators
// Not having perfect match isn't terrible, it will only result in a few more memory allocations.
// Ex:
// foo_bar_baz: 9 original chars, 11 final. 9 * 5 / 4 = 11
// foo_id: 5 original chars, 6 final. 5 * 5 / 4 = 6
// a_b_c_d: 4 original chars, 7 final. 4 * 5 / 4 = 5, which will result in an extra allocation.
length = length * 5 / 4
}

b.Grow(length)
return &b
}

// Convert to uppercase if initialism and `initialism` is true.
// Convert first rune to uppercase otherwise.
func pascalPart(part []rune, initialism bool) {
if initialism && isInitialism(part) {
for ri, r := range part {
part[ri] = unicode.ToUpper(r)
}
} else {
part[0] = unicode.ToUpper(part[0])
}
}

type runeCategory int

const (
unknown runeCategory = iota
number
lower
upper
)

func category(r rune) runeCategory {
switch {
case unicode.IsLower(r):
return lower
case unicode.IsUpper(r):
return upper
case unicode.IsNumber(r):
return number
default:
return unknown
}
}
241 changes: 241 additions & 0 deletions strcase/id_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,241 @@
package strcase

import (
"fmt"
"strings"
"testing"

"github.com/stretchr/testify/require"
)

// splitjoin_l1_p1 38.1 ns/op 16 B/op 1 allocs/op
// IDToCamelCase_l1_p1 88.6 ns/op 48 B/op 3 allocs/op
// IDToSnakeCase_l1_p1 87.7 ns/op 48 B/op 3 allocs/op
//
// splitjoin_l1_p10 253 ns/op 176 B/op 2 allocs/op
// IDToCamelCase_l1_p10 421 ns/op 72 B/op 3 allocs/op
// IDToSnakeCase_l1_p10 269 ns/op 72 B/op 3 allocs/op
//
// splitjoin_l1_p100 2137 ns/op 1904 B/op 2 allocs/op
// IDToCamelCase_l1_p100 3503 ns/op 248 B/op 3 allocs/op
// IDToSnakeCase_l1_p100 1879 ns/op 296 B/op 3 allocs/op
//
// splitjoin_l10_p1 38.0 ns/op 16 B/op 1 allocs/op
// IDToCamelCase_l10_p1 247 ns/op 168 B/op 6 allocs/op
// IDToSnakeCase_l10_p1 248 ns/op 168 B/op 6 allocs/op
//
// splitjoin_l10_p10 278 ns/op 272 B/op 2 allocs/op
// IDToCamelCase_l10_p10 1140 ns/op 264 B/op 6 allocs/op
// IDToSnakeCase_l10_p10 979 ns/op 296 B/op 6 allocs/op
//
// splitjoin_l10_p100 2267 ns/op 2816 B/op 2 allocs/op
// IDToCamelCase_l10_p100 9538 ns/op 1304 B/op 6 allocs/op
// IDToSnakeCase_l10_p100 8147 ns/op 1560 B/op 6 allocs/op
//
// splitjoin_l100_p1 41.1 ns/op 16 B/op 1 allocs/op
// IDToCamelCase_l100_p1 1114 ns/op 1160 B/op 9 allocs/op
// IDToSnakeCase_l100_p1 1104 ns/op 1176 B/op 9 allocs/op
//
// splitjoin_l100_p10 446 ns/op 1184 B/op 2 allocs/op
// IDToCamelCase_l100_p10 7692 ns/op 2072 B/op 9 allocs/op
// IDToSnakeCase_l100_p10 7589 ns/op 2328 B/op 9 allocs/op
//
// splitjoin_l100_p100 3877 ns/op 12032 B/op 2 allocs/op
// IDToCamelCase_l100_p100 72671 ns/op 11288 B/op 9 allocs/op
// IDToSnakeCase_l100_p100 71673 ns/op 14616 B/op 9 allocs/op
func Benchmark_splitJoin(b *testing.B) {
for _, length := range []int{1, 10, 100} {
part := strings.Repeat("a", length)

for _, count := range []int{1, 10, 100} {
input := part + strings.Repeat("_"+part, count-1)

// Baseline, split and join all parts
b.Run(fmt.Sprintf("splitjoin_l%d_p%d", length, count), func(b *testing.B) {
for i := 0; i < b.N; i++ {
strings.Join(strings.Split(input, "_"), "")
}
})

b.Run(fmt.Sprintf("IDToCamelCase_l%d_p%d", length, count), func(b *testing.B) {
for i := 0; i < b.N; i++ {
ToCamelCase(input)
}
})

b.Run(fmt.Sprintf("IDToSnakeCase_l%d_p%d", length, count), func(b *testing.B) {
for i := 0; i < b.N; i++ {
ToSnakeCase(input)
}
})
}
}
}

// lower 5.03 ns/op 0 B/op 0 allocs/op
// upper 5.81 ns/op 0 B/op 0 allocs/op
// number 6.59 ns/op 0 B/op 0 allocs/op
// symbol 6.58 ns/op 0 B/op 0 allocs/op
// 16_bits 153 ns/op 0 B/op 0 allocs/op
// 32_bits 160 ns/op 0 B/op 0 allocs/op
func Benchmark_category(b *testing.B) {
tests := map[string][]rune{
"lower": {'a', 'b'},
"upper": {'A', 'B'},
"number": {'0', '1'},
"symbol": {'_', ' '},
"16 bits": {'™', '∞', '•', 'Ω'},
"32 bits": {'𠁂', '𠁄', '𠁔', '𠁑'},
}
for name, runes := range tests {
b.Run(name, func(b *testing.B) {
for i := 0; i < b.N; i++ {
for _, r := range runes {
category(r)
}
}
})
}
}

func Test_splitJoin(t *testing.T) {
tests := []struct {
input string
camel string
camelGo string
pascal string
pascalGo string
snake string
header string
}{
{
// everything empty
},
{
input: "a",
pascal: "A",
camel: "a",
snake: "a",
header: "A",
},
{
input: "A",
pascal: "A",
camel: "a",
snake: "a",
header: "A",
},
{
input: "a_a",
pascal: "AA",
camel: "aA",
snake: "a_a",
header: "A-A",
},
{
input: "__a___a_",
pascal: "AA",
camel: "aA",
snake: "a_a",
header: "A-A",
},
{
input: "aa_bbb",
pascal: "AaBbb",
camel: "aaBbb",
snake: "aa_bbb",
header: "Aa-Bbb",
},
{
input: "aa_id",
pascal: "AaId",
pascalGo: "AaID",
camel: "aaId",
camelGo: "aaID",
snake: "aa_id",
header: "Aa-ID",
},
{
input: "fooBar",
pascal: "FooBar",
camel: "fooBar",
snake: "foo_bar",
header: "Foo-Bar",
},
{
input: "FooBAR",
pascal: "FooBar",
camel: "fooBar",
snake: "foo_bar",
header: "Foo-Bar",
},
{
input: "fooUrl",
pascal: "FooUrl",
pascalGo: "FooURL",
camel: "fooUrl",
camelGo: "fooURL",
snake: "foo_url",
header: "Foo-URL",
},
{
input: "fooURL",
pascal: "FooUrl",
pascalGo: "FooURL",
camel: "fooUrl",
camelGo: "fooURL",
snake: "foo_url",
header: "Foo-URL",
},
{
input: "url10",
pascal: "Url10",
pascalGo: "URL10",
camel: "url10",
snake: "url_10",
header: "URL-10",
},
{
input: "url_id",
pascal: "UrlId",
pascalGo: "URLID",
camel: "urlId",
camelGo: "urlID",
snake: "url_id",
header: "URL-ID",
},
}
for _, tt := range tests {
t.Run(tt.input, func(t *testing.T) {
t.Run("ToPascalCase", func(t *testing.T) {
require.Equal(t, tt.pascal, ToPascalCase(tt.input))
})

t.Run("ToCamelCase", func(t *testing.T) {
require.Equal(t, tt.camel, ToCamelCase(tt.input))
})

t.Run("ToSnakeCase", func(t *testing.T) {
require.Equal(t, tt.snake, ToSnakeCase(tt.input))
})

t.Run("ToPascalGoCase", func(t *testing.T) {
if tt.pascalGo == "" {
tt.pascalGo = tt.pascal
}
require.Equal(t, tt.pascalGo, ToPascalGoCase(tt.input))
})

t.Run("ToCamelGoCase", func(t *testing.T) {
if tt.camelGo == "" {
tt.camelGo = tt.camel
}
require.Equal(t, tt.camelGo, ToCamelGoCase(tt.input))
})

t.Run("ToHeaderField", func(t *testing.T) {
require.Equal(t, tt.header, ToHeaderField(tt.input))
})
})
}
}