Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 20 additions & 13 deletions bag.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,13 +54,6 @@ func (b *Bag) GetResults(in string) (r Results) {

return
}
func (b *Bag) toNGrams(in string) (ns []string) {
if b.c.NGramType == "word" {
return toNGrams(in, b.c.NGramSize)
}

return tocharacterNGrams(in, b.c.NGramSize)
}

func (b *Bag) Train(in, label string) {
// Convert inbound data to a slice of NGrams
Expand All @@ -73,16 +66,23 @@ func (b *Bag) Train(in, label string) {
v[n]++
}

// Increment count of trained documents for the provided label
b.countByLabel[label]++
// Increment total count of trained documents
b.totalCount++
// Increment model counters
b.incrementCounts(label)
}

// toNGrams converts the inbound string into n-grams based on the configuration settings
func (b *Bag) toNGrams(in string) (ns []string) {
if b.c.NGramType == "word" {
return toNGrams(in, b.c.NGramSize)
}

return tocharacterNGrams(in, b.c.NGramSize)
}

// getProbability uses a Naive Bayes classifier to determine probability for a given label
func (b *Bag) getProbability(ns []string, label string, vocab Vocabulary) (probability float64) {
// Set initial probability value as the prior probability value
probability = b.getPriorProbability(label)
probability = b.getLogPriorProbability(label)
// Get the current counts by label (to be used by Laplace smoothing during for-loop)
countsByLabel := float64(b.countByLabel[label]) + b.c.SmoothingParameter*float64(len(vocab))

Expand All @@ -98,7 +98,7 @@ func (b *Bag) getProbability(ns []string, label string, vocab Vocabulary) (proba
return
}

func (b *Bag) getPriorProbability(label string) (probability float64) {
func (b *Bag) getLogPriorProbability(label string) (probability float64) {
count := float64(b.countByLabel[label])
total := float64(b.totalCount)
// Get the logarithmic value of count divided by total count
Expand All @@ -118,3 +118,10 @@ func (b *Bag) getOrCreateVocabulary(label string) (v Vocabulary) {

return
}

func (b *Bag) incrementCounts(label string) {
// Increment count of trained documents for the provided label
b.countByLabel[label]++
// Increment total count of trained documents
b.totalCount++
}
56 changes: 28 additions & 28 deletions ngram.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,57 +2,57 @@ package bag

import "bytes"

// toNGrams will convert inbound data to an NGram of provided size
// toNGrams will convert inbound data to an nGram of provided size
func toNGrams(in string, size int) (ns []string) {
// Initialize NGram with a provided size
n := make(NGram, size)
// Initialize nGram with a provided size
n := make(nGram, size)
// Iterate inbound data as words
toWords(in, func(word string) {
// Append word to NGram
// Append word to nGram
n = n.Append(word)
if !n.IsFull() {
// NGram is not full - we do not want to append yet, return
return
}

// Append current NGram to NGrams slice
// Append current nGram to nGrams slice
ns = append(ns, n.String())
})

if !n.IsFull() && !n.IsZero() {
// The NGram is not full, so we haven't appended yet
// The NGram is not empty, so we have something to append
// Append current NGram to NGrams slice
// The nGram is not full, so we haven't appended yet
// The nGram is not empty, so we have something to append
// Append current nGram to nGrams slice
ns = append(ns, n.String())
}

return
}

// NGram represents an NGram (variable sized)
type NGram []string
// nGram represents an N-Gram (variable sized)
type nGram []string

// Append will append a given string to an NGram and output the new value
// Note: The original NGram is NOT modified
func (n NGram) Append(str string) (out NGram) {
// Initialize new NGram with the same size as the original NGram
out = make(NGram, len(n))
// Iterate through original NGram, starting at index 1
// Append will append a given string to an nGram and output the new value
// Note: The original nGram is NOT modified
func (n nGram) Append(str string) (out nGram) {
// Initialize new nGram with the same size as the original nGram
out = make(nGram, len(n))
// Iterate through original nGram, starting at index 1
for i := 1; i < len(n); i++ {
// Set the value of the current original NGram index as the value for the previous index for the output NGram
// Set the value of the current original nGram index as the value for the previous index for the output nGram
out[i-1] = n[i]
}

// Set the last value of the output NGram as the input string
// Set the last value of the output nGram as the input string
out[len(n)-1] = str
return
}

// String will convert the NGram contents to a string
func (n NGram) String() (out string) {
// String will convert the nGram contents to a string
func (n nGram) String() (out string) {
// Initialize buffer
buf := bytes.NewBuffer(nil)
// Iterate through NGram values
// Iterate through nGram values
n.iterate(func(value string) {
if buf.Len() > 0 {
// Buffer is not empty, prefix the iterating value with a space
Expand All @@ -67,21 +67,21 @@ func (n NGram) String() (out string) {
return buf.String()
}

// IsZero returns whether or not the NGram is empty
func (n NGram) IsZero() bool {
// IsZero returns whether or not the nGram is empty
func (n nGram) IsZero() bool {
// Return result of if the value in the last position is empty
return len(n[len(n)-1]) == 0
}

// IsFull returns whether or not the NGram is full
func (n NGram) IsFull() bool {
// IsFull returns whether or not the nGram is full
func (n nGram) IsFull() bool {
// Return result of if the value in the first position is populated
return len(n[0]) > 0
}

// iterate will iterate through the NGram values
func (n NGram) iterate(fn func(word string)) {
// Iterate through NGram values
// iterate will iterate through the nGram values
func (n nGram) iterate(fn func(word string)) {
// Iterate through nGram values
for _, str := range n {
// Check if value is empty
if len(str) == 0 {
Expand Down
6 changes: 0 additions & 6 deletions sample.go

This file was deleted.

3 changes: 3 additions & 0 deletions samples.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
package bag

type Samples []string
3 changes: 3 additions & 0 deletions samplesbylabel.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
package bag

type SamplesByLabel map[string]Samples
4 changes: 0 additions & 4 deletions trainingset.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,3 @@ type TrainingSet struct {

Samples SamplesByLabel `yaml:"samples"`
}

type SamplesByLabel map[string]Samples

type Samples []string