From 0f1334494bf1fc740b4d48c7666136f508d8eb7a Mon Sep 17 00:00:00 2001 From: Josh Montoya Date: Mon, 15 Jul 2024 21:36:36 -0700 Subject: [PATCH 1/7] implement circular buff --- circularbuffer.go | 59 ++++++++++ circularbuffer_test.go | 237 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 296 insertions(+) create mode 100644 circularbuffer.go create mode 100644 circularbuffer_test.go diff --git a/circularbuffer.go b/circularbuffer.go new file mode 100644 index 0000000..4e9d45c --- /dev/null +++ b/circularbuffer.go @@ -0,0 +1,59 @@ +package bag + +func newCircularBuffer[T any](capacity int) *circularBuffer[T] { + var c circularBuffer[T] + c.cap = capacity + c.s = make([]T, capacity) + return &c +} + +type circularBuffer[T any] struct { + start int + end int + + len int + cap int + + s []T +} + +func (c *circularBuffer[T]) Shift(item T) (popped T) { + popped = c.s[c.end] + c.s[c.end] = item + + c.end++ + if c.len < c.cap { + c.len++ + } else { + if c.start++; c.start >= c.cap { + c.start = 0 + } + + } + + if c.end >= c.cap { + c.end = 0 + } + + return +} + +func (c *circularBuffer[T]) ForEach(fn func(t T) (end bool)) (ended bool) { + index := c.start + for i := 0; i < c.len; i++ { + item := c.s[index] + if fn(item) { + return true + } + + if index++; index >= c.len { + index = 0 + } + } + + return +} + +func (c *circularBuffer[T]) Len() int { + return c.len +} diff --git a/circularbuffer_test.go b/circularbuffer_test.go new file mode 100644 index 0000000..c9e263e --- /dev/null +++ b/circularbuffer_test.go @@ -0,0 +1,237 @@ +package bag + +import ( + "reflect" + "testing" +) + +func Test_circularBuffer_Shift(t *testing.T) { + type fields struct { + size int + } + + type args struct { + values []int + } + + type testcase struct { + name string + fields fields + args args + + wantPopped []int + wantSlice []int + } + + tests := []testcase{ + { + name: "basic", + fields: fields{ + size: 3, + }, + args: args{ + values: []int{1, 2, 3}, + }, + wantPopped: []int{0, 0, 0}, + wantSlice: []int{1, 2, 3}, + }, + { + name: "with popped", + fields: fields{ + size: 3, + }, + args: args{ + values: []int{1, 2, 3, 4, 5, 6}, + }, + wantPopped: []int{0, 0, 0, 1, 2, 3}, + wantSlice: []int{4, 5, 6}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + b := newCircularBuffer[int](tt.fields.size) + for i, arg := range tt.args.values { + if got := b.Shift(arg); got != tt.wantPopped[i] { + t.Fatalf("invalid value, wanted <%d> and received <%d>", tt.wantPopped[i], got) + } + } + + if !reflect.DeepEqual(b.s, tt.wantSlice) { + t.Fatalf("invalid slice, wanted <%+v> and received <%+v>", tt.wantSlice, b.s) + } + }) + } +} + +func Test_circularBuffer_ForEach(t *testing.T) { + type fields struct { + size int + } + + type args struct { + values []int + hasBreak bool + } + + type testcase struct { + name string + fields fields + args args + + want []int + wantBreak bool + } + + tests := []testcase{ + { + name: "basic", + fields: fields{ + size: 3, + }, + args: args{ + values: []int{1, 2, 3}, + }, + want: []int{1, 2, 3}, + }, + { + name: "not full", + fields: fields{ + size: 3, + }, + args: args{ + values: []int{1, 2}, + }, + want: []int{1, 2}, + }, + { + name: "with partial popped", + fields: fields{ + size: 3, + }, + args: args{ + values: []int{1, 2, 3, 4, 5}, + }, + want: []int{3, 4, 5}, + }, + { + name: "with complete popped", + fields: fields{ + size: 3, + }, + args: args{ + values: []int{1, 2, 3, 4, 5, 6}, + }, + want: []int{4, 5, 6}, + }, + { + name: "with has break", + fields: fields{ + size: 3, + }, + args: args{ + values: []int{1, 2, 3, 4, 5, 6}, + hasBreak: true, + }, + want: []int{4, 5, 6}, + wantBreak: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + b := newCircularBuffer[int](tt.fields.size) + for _, arg := range tt.args.values { + b.Shift(arg) + } + + var i int + gotBreak := b.ForEach(func(val int) (end bool) { + if val != tt.want[i] { + t.Fatalf("invalid iteration value, expected %d and received %d", tt.want[i], val) + } + i++ + + return tt.args.hasBreak + }) + + if gotBreak != tt.wantBreak { + t.Fatalf("invalid break value, expected %v and received %v", tt.wantBreak, gotBreak) + } + }) + } +} + +func Test_circularBuffer_Len(t *testing.T) { + type fields struct { + size int + } + + type args struct { + values []int + } + + type testcase struct { + name string + fields fields + args args + + want int + } + + tests := []testcase{ + { + name: "basic", + fields: fields{ + size: 3, + }, + args: args{ + values: []int{1, 2, 3}, + }, + want: 3, + }, + { + name: "partial", + fields: fields{ + size: 3, + }, + args: args{ + values: []int{1, 2}, + }, + want: 2, + }, + { + name: "empty", + fields: fields{ + size: 3, + }, + args: args{ + values: []int{}, + }, + want: 0, + }, + { + name: "with popped", + fields: fields{ + size: 3, + }, + args: args{ + values: []int{1, 2, 3, 4, 5, 6}, + }, + want: 3, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + b := newCircularBuffer[int](tt.fields.size) + for _, arg := range tt.args.values { + b.Shift(arg) + } + + if got := b.Len(); got != tt.want { + t.Fatalf("invalid length, expected %d and recieved %d", tt.want, got) + } + }) + } +} From f9f92c9d700ab791c0daa4c9a7b7291bd251b939 Mon Sep 17 00:00:00 2001 From: Josh Montoya Date: Mon, 15 Jul 2024 22:24:38 -0700 Subject: [PATCH 2/7] add cap func --- circularbuffer.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/circularbuffer.go b/circularbuffer.go index 4e9d45c..a2a63d3 100644 --- a/circularbuffer.go +++ b/circularbuffer.go @@ -57,3 +57,7 @@ func (c *circularBuffer[T]) ForEach(fn func(t T) (end bool)) (ended bool) { func (c *circularBuffer[T]) Len() int { return c.len } + +func (c *circularBuffer[T]) Cap() int { + return c.cap +} From 849726c77cf564bfc9a16a021c8a752db4012c7f Mon Sep 17 00:00:00 2001 From: Josh Montoya Date: Mon, 15 Jul 2024 22:24:51 -0700 Subject: [PATCH 3/7] implement toNGram benchmark --- ngram_test.go | 82 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) create mode 100644 ngram_test.go diff --git a/ngram_test.go b/ngram_test.go new file mode 100644 index 0000000..c261b6a --- /dev/null +++ b/ngram_test.go @@ -0,0 +1,82 @@ +package bag + +import ( + "reflect" + "testing" +) + +var ngramsSink []string + +func Test_toNGrams(t *testing.T) { + type args struct { + in string + size int + } + + type testcase struct { + name string + args args + wantNs []string + } + + tests := []testcase{ + { + name: "basic", + args: args{ + in: "hello world! This is really cool, wowo", + size: 3, + }, + wantNs: []string{ + "hello world this", + "world this is", + "this is really", + "is really cool", + "really cool wowo", + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if gotNs := toNGrams(tt.args.in, tt.args.size); !reflect.DeepEqual(gotNs, tt.wantNs) { + t.Errorf("toNGrams() = %v, want %v", gotNs, tt.wantNs) + } + }) + } +} + +func Benchmark_toNGrams(b *testing.B) { + type args struct { + in string + size int + } + + type testcase struct { + name string + args args + wantNs []string + } + + tests := []testcase{ + { + name: "basic", + args: args{ + in: "hello world! This is really cool, wowo", + size: 3, + }, + wantNs: []string{ + "hello world this", + "world this is", + "this is really", + "is really cool", + "really cool wowo", + }, + }, + } + + for i := 0; i < b.N; i++ { + for _, tc := range tests { + ngramsSink = toNGrams(tc.args.in, tc.args.size) + } + } +} From 8f5e03d42113b7c53c857813efd8eda8f5da1598 Mon Sep 17 00:00:00 2001 From: Josh Montoya Date: Mon, 15 Jul 2024 23:36:49 -0700 Subject: [PATCH 4/7] make error output easier to read --- ngram_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ngram_test.go b/ngram_test.go index c261b6a..8d9fc3a 100644 --- a/ngram_test.go +++ b/ngram_test.go @@ -39,7 +39,7 @@ func Test_toNGrams(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { if gotNs := toNGrams(tt.args.in, tt.args.size); !reflect.DeepEqual(gotNs, tt.wantNs) { - t.Errorf("toNGrams() = %v, want %v", gotNs, tt.wantNs) + t.Errorf("toNGrams() = \n%v,\n want \n%v", gotNs, tt.wantNs) } }) } From 4e93286b73cb870d34c18ba7cdc596d90e265486 Mon Sep 17 00:00:00 2001 From: Josh Montoya Date: Mon, 15 Jul 2024 23:37:01 -0700 Subject: [PATCH 5/7] use circular buffer --- ngram.go | 56 +++++++++++++++----------------------------------------- 1 file changed, 15 insertions(+), 41 deletions(-) diff --git a/ngram.go b/ngram.go index a3e0dc9..0b1346c 100644 --- a/ngram.go +++ b/ngram.go @@ -1,15 +1,17 @@ package bag -import "bytes" +import ( + "bytes" +) // toNGrams will convert inbound data to an nGram of provided size func toNGrams(in string, size int) (ns []string) { - // Initialize nGram with a provided size - n := make(nGram, size) + var n nGram + n.circularBuffer = newCircularBuffer[string](size) // Iterate inbound data as words toWords(in, func(word string) { // Append word to nGram - n = n.Append(word) + n.Shift(word) if !n.IsFull() { // NGram is not full - we do not want to append yet, return return @@ -30,30 +32,16 @@ func toNGrams(in string, size int) (ns []string) { } // nGram represents an N-Gram (variable sized) -type nGram []string - -// Append will append a given string to an nGram and output the new value -// Note: The original nGram is NOT modified -func (n nGram) Append(str string) (out nGram) { - // Initialize new nGram with the same size as the original nGram - out = make(nGram, len(n)) - // Iterate through original nGram, starting at index 1 - for i := 1; i < len(n); i++ { - // Set the value of the current original nGram index as the value for the previous index for the output nGram - out[i-1] = n[i] - } - - // Set the last value of the output nGram as the input string - out[len(n)-1] = str - return +type nGram struct { + *circularBuffer[string] } // String will convert the nGram contents to a string -func (n nGram) String() (out string) { +func (n *nGram) String() (out string) { // Initialize buffer buf := bytes.NewBuffer(nil) // Iterate through nGram values - n.iterate(func(value string) { + n.ForEach(func(value string) (end bool) { if buf.Len() > 0 { // Buffer is not empty, prefix the iterating value with a space buf.WriteByte(' ') @@ -61,6 +49,7 @@ func (n nGram) String() (out string) { // Write value to buffer buf.WriteString(value) + return }) // Return buffer as string @@ -69,27 +58,12 @@ func (n nGram) String() (out string) { // IsZero returns whether or not the nGram is empty func (n nGram) IsZero() bool { - // Return result of if the value in the last position is empty - return len(n[len(n)-1]) == 0 + // Return result of if the value in the first position is populated + return len(n.s[0]) == 0 } // IsFull returns whether or not the nGram is full func (n nGram) IsFull() bool { - // Return result of if the value in the first position is populated - return len(n[0]) > 0 -} - -// iterate will iterate through the nGram values -func (n nGram) iterate(fn func(word string)) { - // Iterate through nGram values - for _, str := range n { - // Check if value is empty - if len(str) == 0 { - // Value is empty, continue - continue - } - - // Value is populated, pass to provided func - fn(str) - } + // Return result of if the value in the last position is empty + return len(n.s[len(n.s)-1]) > 0 } From 4fe26f5d1194d74463521feb401ea532e2269c86 Mon Sep 17 00:00:00 2001 From: Josh Montoya Date: Mon, 15 Jul 2024 23:42:21 -0700 Subject: [PATCH 6/7] add circular buffer test --- circularbuffer_test.go | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/circularbuffer_test.go b/circularbuffer_test.go index c9e263e..ab2d9e2 100644 --- a/circularbuffer_test.go +++ b/circularbuffer_test.go @@ -235,3 +235,29 @@ func Test_circularBuffer_Len(t *testing.T) { }) } } + +func Test_newCircularBuffer(t *testing.T) { + type args struct { + capacity int + } + tests := []struct { + name string + args args + }{ + { + name: "basic", + args: args{ + capacity: 3, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + c := newCircularBuffer[string](tt.args.capacity) + if got := c.Cap(); got != tt.args.capacity { + t.Errorf("newCircularBuffer().Capacity = %v, want %v", got, tt.args.capacity) + } + }) + } +} From f75fc09097c5921c8a72f9e5881273e2e5846834 Mon Sep 17 00:00:00 2001 From: Josh Montoya Date: Mon, 15 Jul 2024 23:44:34 -0700 Subject: [PATCH 7/7] remove non-used Cap --- circularbuffer.go | 4 ---- circularbuffer_test.go | 26 -------------------------- 2 files changed, 30 deletions(-) diff --git a/circularbuffer.go b/circularbuffer.go index a2a63d3..4e9d45c 100644 --- a/circularbuffer.go +++ b/circularbuffer.go @@ -57,7 +57,3 @@ func (c *circularBuffer[T]) ForEach(fn func(t T) (end bool)) (ended bool) { func (c *circularBuffer[T]) Len() int { return c.len } - -func (c *circularBuffer[T]) Cap() int { - return c.cap -} diff --git a/circularbuffer_test.go b/circularbuffer_test.go index ab2d9e2..c9e263e 100644 --- a/circularbuffer_test.go +++ b/circularbuffer_test.go @@ -235,29 +235,3 @@ func Test_circularBuffer_Len(t *testing.T) { }) } } - -func Test_newCircularBuffer(t *testing.T) { - type args struct { - capacity int - } - tests := []struct { - name string - args args - }{ - { - name: "basic", - args: args{ - capacity: 3, - }, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - c := newCircularBuffer[string](tt.args.capacity) - if got := c.Cap(); got != tt.args.capacity { - t.Errorf("newCircularBuffer().Capacity = %v, want %v", got, tt.args.capacity) - } - }) - } -}