Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 59 additions & 0 deletions circularbuffer.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
package bag

func newCircularBuffer[T any](capacity int) *circularBuffer[T] {
var c circularBuffer[T]
c.cap = capacity
c.s = make([]T, capacity)
return &c
}

type circularBuffer[T any] struct {
start int
end int

len int
cap int

s []T
}

func (c *circularBuffer[T]) Shift(item T) (popped T) {
popped = c.s[c.end]
c.s[c.end] = item

c.end++
if c.len < c.cap {
c.len++
} else {
if c.start++; c.start >= c.cap {
c.start = 0
}

}

if c.end >= c.cap {
c.end = 0
}

return
}

func (c *circularBuffer[T]) ForEach(fn func(t T) (end bool)) (ended bool) {
index := c.start
for i := 0; i < c.len; i++ {
item := c.s[index]
if fn(item) {
return true
}

if index++; index >= c.len {
index = 0
}
}

return
}

func (c *circularBuffer[T]) Len() int {
return c.len
}
237 changes: 237 additions & 0 deletions circularbuffer_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,237 @@
package bag

import (
"reflect"
"testing"
)

func Test_circularBuffer_Shift(t *testing.T) {
type fields struct {
size int
}

type args struct {
values []int
}

type testcase struct {
name string
fields fields
args args

wantPopped []int
wantSlice []int
}

tests := []testcase{
{
name: "basic",
fields: fields{
size: 3,
},
args: args{
values: []int{1, 2, 3},
},
wantPopped: []int{0, 0, 0},
wantSlice: []int{1, 2, 3},
},
{
name: "with popped",
fields: fields{
size: 3,
},
args: args{
values: []int{1, 2, 3, 4, 5, 6},
},
wantPopped: []int{0, 0, 0, 1, 2, 3},
wantSlice: []int{4, 5, 6},
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
b := newCircularBuffer[int](tt.fields.size)
for i, arg := range tt.args.values {
if got := b.Shift(arg); got != tt.wantPopped[i] {
t.Fatalf("invalid value, wanted <%d> and received <%d>", tt.wantPopped[i], got)
}
}

if !reflect.DeepEqual(b.s, tt.wantSlice) {
t.Fatalf("invalid slice, wanted <%+v> and received <%+v>", tt.wantSlice, b.s)
}
})
}
}

func Test_circularBuffer_ForEach(t *testing.T) {
type fields struct {
size int
}

type args struct {
values []int
hasBreak bool
}

type testcase struct {
name string
fields fields
args args

want []int
wantBreak bool
}

tests := []testcase{
{
name: "basic",
fields: fields{
size: 3,
},
args: args{
values: []int{1, 2, 3},
},
want: []int{1, 2, 3},
},
{
name: "not full",
fields: fields{
size: 3,
},
args: args{
values: []int{1, 2},
},
want: []int{1, 2},
},
{
name: "with partial popped",
fields: fields{
size: 3,
},
args: args{
values: []int{1, 2, 3, 4, 5},
},
want: []int{3, 4, 5},
},
{
name: "with complete popped",
fields: fields{
size: 3,
},
args: args{
values: []int{1, 2, 3, 4, 5, 6},
},
want: []int{4, 5, 6},
},
{
name: "with has break",
fields: fields{
size: 3,
},
args: args{
values: []int{1, 2, 3, 4, 5, 6},
hasBreak: true,
},
want: []int{4, 5, 6},
wantBreak: true,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
b := newCircularBuffer[int](tt.fields.size)
for _, arg := range tt.args.values {
b.Shift(arg)
}

var i int
gotBreak := b.ForEach(func(val int) (end bool) {
if val != tt.want[i] {
t.Fatalf("invalid iteration value, expected %d and received %d", tt.want[i], val)
}
i++

return tt.args.hasBreak
})

if gotBreak != tt.wantBreak {
t.Fatalf("invalid break value, expected %v and received %v", tt.wantBreak, gotBreak)
}
})
}
}

func Test_circularBuffer_Len(t *testing.T) {
type fields struct {
size int
}

type args struct {
values []int
}

type testcase struct {
name string
fields fields
args args

want int
}

tests := []testcase{
{
name: "basic",
fields: fields{
size: 3,
},
args: args{
values: []int{1, 2, 3},
},
want: 3,
},
{
name: "partial",
fields: fields{
size: 3,
},
args: args{
values: []int{1, 2},
},
want: 2,
},
{
name: "empty",
fields: fields{
size: 3,
},
args: args{
values: []int{},
},
want: 0,
},
{
name: "with popped",
fields: fields{
size: 3,
},
args: args{
values: []int{1, 2, 3, 4, 5, 6},
},
want: 3,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
b := newCircularBuffer[int](tt.fields.size)
for _, arg := range tt.args.values {
b.Shift(arg)
}

if got := b.Len(); got != tt.want {
t.Fatalf("invalid length, expected %d and recieved %d", tt.want, got)
}
})
}
}
56 changes: 15 additions & 41 deletions ngram.go
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
package bag

import "bytes"
import (
"bytes"
)

// toNGrams will convert inbound data to an nGram of provided size
func toNGrams(in string, size int) (ns []string) {
// Initialize nGram with a provided size
n := make(nGram, size)
var n nGram
n.circularBuffer = newCircularBuffer[string](size)
// Iterate inbound data as words
toWords(in, func(word string) {
// Append word to nGram
n = n.Append(word)
n.Shift(word)
if !n.IsFull() {
// NGram is not full - we do not want to append yet, return
return
Expand All @@ -30,37 +32,24 @@ func toNGrams(in string, size int) (ns []string) {
}

// nGram represents an N-Gram (variable sized)
type nGram []string

// Append will append a given string to an nGram and output the new value
// Note: The original nGram is NOT modified
func (n nGram) Append(str string) (out nGram) {
// Initialize new nGram with the same size as the original nGram
out = make(nGram, len(n))
// Iterate through original nGram, starting at index 1
for i := 1; i < len(n); i++ {
// Set the value of the current original nGram index as the value for the previous index for the output nGram
out[i-1] = n[i]
}

// Set the last value of the output nGram as the input string
out[len(n)-1] = str
return
type nGram struct {
*circularBuffer[string]
}

// String will convert the nGram contents to a string
func (n nGram) String() (out string) {
func (n *nGram) String() (out string) {
// Initialize buffer
buf := bytes.NewBuffer(nil)
// Iterate through nGram values
n.iterate(func(value string) {
n.ForEach(func(value string) (end bool) {
if buf.Len() > 0 {
// Buffer is not empty, prefix the iterating value with a space
buf.WriteByte(' ')
}

// Write value to buffer
buf.WriteString(value)
return
})

// Return buffer as string
Expand All @@ -69,27 +58,12 @@ func (n nGram) String() (out string) {

// IsZero returns whether or not the nGram is empty
func (n nGram) IsZero() bool {
// Return result of if the value in the last position is empty
return len(n[len(n)-1]) == 0
// Return result of if the value in the first position is populated
return len(n.s[0]) == 0
}

// IsFull returns whether or not the nGram is full
func (n nGram) IsFull() bool {
// Return result of if the value in the first position is populated
return len(n[0]) > 0
}

// iterate will iterate through the nGram values
func (n nGram) iterate(fn func(word string)) {
// Iterate through nGram values
for _, str := range n {
// Check if value is empty
if len(str) == 0 {
// Value is empty, continue
continue
}

// Value is populated, pass to provided func
fn(str)
}
// Return result of if the value in the last position is empty
return len(n.s[len(n.s)-1]) > 0
}
Loading