Skip to content

Commit

Permalink
Merge 72bccef into 48b03dc
Browse files Browse the repository at this point in the history
  • Loading branch information
Hugo-ter-Doest committed Mar 21, 2024
2 parents 48b03dc + 72bccef commit 50ee263
Show file tree
Hide file tree
Showing 49 changed files with 5,044 additions and 1,268 deletions.
3 changes: 2 additions & 1 deletion .env
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
NODE_PATH=.
# Storage backend you want to use
NATURAL_STORAGE=PG
# Settings for Postgres
Expand All @@ -20,4 +21,4 @@ MONGO_PORT=27017
MONGO_DATABASE=naturaldb
MONGO_OBJECTMODEL=NaturalObjectModel
# Path to the folder writing files
FS_PATH=.
FS_PATH=./io_spec/tmp
13 changes: 13 additions & 0 deletions .eslintrc.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"env": {
"browser": true,
"commonjs": true,
"es2021": true
},
"extends": "standard-with-typescript",
"parserOptions": {
"ecmaVersion": "latest"
},
"rules": {
}
}
2 changes: 1 addition & 1 deletion .github/workflows/node.js.yml
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ jobs:
- name: Clean install
run: npm ci

- name: Run tests
- name: Run Nodejs tests
run: npm run test

- name: Run IO tests
Expand Down
1 change: 0 additions & 1 deletion example.txt

This file was deleted.

10 changes: 5 additions & 5 deletions io_spec/MaxEntClassifier_spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,11 @@ THE SOFTWARE.
const fs = require('fs')
const _ = require('underscore')

const Sample = require('lib/natural/classifiers/maxent/Sample')
const Element = require('lib/natural/classifiers/maxent/SimpleExample/SE_Element')
const Context = require('lib/natural/classifiers/maxent/Context')
const FeatureSet = require('lib/natural/classifiers/maxent/FeatureSet')
const Classifier = require('lib/natural/classifiers/maxent/Classifier')
const Sample = require('../lib/natural/classifiers/maxent/Sample')
const Element = require('../lib/natural/classifiers/maxent/SimpleExample/SE_Element')
const Context = require('../lib/natural/classifiers/maxent/Context')
const FeatureSet = require('../lib/natural/classifiers/maxent/FeatureSet')
const Classifier = require('../lib/natural/classifiers/maxent/Classifier')

const classifierFile = 'io_spec/test_data/classifier.json'
const nrIterations = 50
Expand Down
8 changes: 4 additions & 4 deletions io_spec/Sample_spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,12 @@ THE SOFTWARE.

const fs = require('fs')

const Sample = require('lib/natural/classifiers/maxent/Sample')
const Element = require('lib/natural/classifiers/maxent/Element')
const Context = require('lib/natural/classifiers/maxent/Context')
const Sample = require('../lib/natural/classifiers/maxent/Sample')
const Element = require('../lib/natural/classifiers/maxent/Element')
const Context = require('../lib/natural/classifiers/maxent/Context')

const DEBUG = false
const sampleFile = 'io_spec/test_data/sample.json'
const sampleFile = './io_spec/test_data/sample.json'

describe('Sample class', function () {
// Create sample
Expand Down
2 changes: 1 addition & 1 deletion io_spec/bayes_classifier_spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ describe('Bayes classifier file I/O', function () {
sandbox.restore()
})
it('should pass an error to the callback function', function () {
sandbox.stub(baseClassifier, 'load', function (filename, cb) {
sandbox.stub(baseClassifier, 'load', function (filename, stemmer, cb) {
cb(new Error('An error occurred'))
})
natural.BayesClassifier.load('/spec/test_data/tfidf_document1.txt', null, function (err, newClassifier) {
Expand Down
4 changes: 2 additions & 2 deletions io_spec/classifier_spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ const fs = require('fs')

describe('classifier file IO', function () {
describe('save', function () {
const tmpFilename = '/spec/test_data/deleteMe'
const tmpFilename = './test_data/deleteMe'
const nonExistentFilename = '/nonExistentDir/deleteMe'
let classifier

Expand Down Expand Up @@ -62,7 +62,7 @@ describe('classifier file IO', function () {
it('does nothing if called without a callback', function () {
let result
try {
result = Classifier.load('io_spec/test_data/tfidf/tfidf_document1.txt')
result = Classifier.load('./test_data/tfidf/tfidf_document1.txt')
} catch (err) {
console.log(err)
}
Expand Down
2 changes: 1 addition & 1 deletion io_spec/tfidf_spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ THE SOFTWARE.

'use strict'

const TfIdf = require('lib/natural/tfidf/tfidf')
const TfIdf = require('../lib/natural/tfidf/tfidf')
let tfidf

describe('tfidf io', function () {
Expand Down
Empty file.
2 changes: 1 addition & 1 deletion io_spec/wordnet_index_spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ const IndexFile = require('../lib/natural/wordnet/index_file')
describe('indexFile', function () {
describe('getFileSize', function () {
it('should look up a word if the file exists', function () {
const indexFile = new IndexFile('spec/test_data/', 'document1.txt')
const indexFile = new IndexFile('./test_data/', 'document1.txt')
indexFile.lookupFromFile('node', function (indexRecord) {
})
})
Expand Down
2 changes: 1 addition & 1 deletion io_spec/wordnet_spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ THE SOFTWARE.

'use strict'

const WordNet = require('lib/natural/wordnet/wordnet')
const WordNet = require('../lib/natural/wordnet/wordnet')

describe('wordnet', function () {
it('should lookup synonyms', function (done) {
Expand Down
16 changes: 7 additions & 9 deletions lib/natural/brill_pos_tagger/index.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/

import { Feature } from '../classifiers'
import type { Feature } from '../classifiers'

declare interface RuleTemplatesItem {
function: (sentence: Sentence, i: number, parameter1?: string, parameter2?: string) => boolean
Expand All @@ -33,9 +33,7 @@ declare interface RuleTemplatesItem {
parameter2Values?: (sentence: Sentence, i: number) => string[]
}

export interface RuleTemplates {
[key: string]: RuleTemplatesItem | undefined
}
export type RuleTemplates = Record<string, RuleTemplatesItem | undefined>

export class RuleTemplate {
constructor (templateName: string, metadata: RuleTemplatesItem)
Expand Down Expand Up @@ -82,7 +80,7 @@ export class RuleSet {

export class Lexicon {
constructor (language: string, defaultCategory: string, defaultCategoryCapitalised?: string)
lexicon: { [key: string]: string[] | undefined }
lexicon: Record<string, string[] | undefined>
defaultCategory: string
defaultCategoryCapitalised: string | undefined
parseLexicon (data: string): void
Expand All @@ -99,8 +97,8 @@ declare class Corpus {
constructor (data: string | Corpus, typeOfCorpus: number, SentenceClass: typeof Sentence)
private readonly wordCount: number
private readonly sentences: Sentence[]
private readonly tagFrequencies: { [key: string]: string[] | undefined }
private readonly posTags: { [key: string]: string[] | undefined }
private readonly tagFrequencies: Record<string, Record<string, number> | undefined>
private readonly posTags: Record<string, boolean | undefined>
parseBrownCorpus (data: string, SentenceClass: typeof Sentence): void
getTags (): string[]
splitInTrainAndTest (percentageTrain: number): [Corpus, Corpus]
Expand Down Expand Up @@ -148,8 +146,8 @@ export class BrillPOSTrainer {
private readonly corpus: Corpus
private readonly templates: RuleTemplates
private readonly positiveRules: RuleSet
private readonly mapRuleToSites: { [key: string]: { [key: number ]: { [key: number ]: boolean | undefined } | undefined } | undefined }
private readonly mapSiteToRules: { [key: number]: { [key: number ]: { [key: string ]: TransformationRule | undefined } | undefined } | undefined }
private readonly mapRuleToSites: Record<string, Record<number, Record< number, boolean | undefined> | undefined> | undefined>
private readonly mapSiteToRules: Record<number, Record<number, Record<string, boolean | undefined> | undefined> | undefined>
private selectHighRule (): TransformationRule
private mapRuleToSite (rule: TransformationRule, i: number, j: number): void
private mapSiteToRule (i: number, j: number, rule: TransformationRule): void
Expand Down
2 changes: 1 addition & 1 deletion lib/natural/classifiers/bayes_classifier.js
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ class BayesClassifier extends Classifier {
}

static load (filename, stemmer, callback) {
Classifier.load(filename, function (err, classifier) {
Classifier.load(filename, stemmer, function (err, classifier) {
if (err) {
return callback(err)
} else {
Expand Down
43 changes: 10 additions & 33 deletions lib/natural/classifiers/classifier.js
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ THE SOFTWARE.
*/

'use strict'
const fs = require('fs')

const EventEmitter = require('events')
const PorterStemmer = require('../stemmers/porter_stemmer')
Expand All @@ -34,6 +35,12 @@ class Classifier extends EventEmitter {
this.features = {}
this.stemmer = stemmer || PorterStemmer
this.lastAdded = 0

// Add methods for parallel training
this.Threads = parallelTrainer.Threads
this.trainParallel = parallelTrainer.trainParallel
this.retrainParallel = parallelTrainer.retrainParallel
this.trainParallelBatches = parallelTrainer.trainParallelBatches
}

addDocument (text, classification) {
Expand Down Expand Up @@ -114,8 +121,8 @@ class Classifier extends EventEmitter {
this.emit('trainedWithDocument', { index: i, total: totalDocs, doc: this.docs[i] })
this.lastAdded++
}
this.emit('doneTraining', true)
this.classifier.train()
this.emit('doneTraining', true)
}

retrain () {
Expand All @@ -139,7 +146,6 @@ class Classifier extends EventEmitter {

save (filename, callback) {
const data = JSON.stringify(this)
const fs = require('fs')
const classifier = this
fs.writeFile(filename, data, 'utf8', function (err) {
if (callback) {
Expand All @@ -148,9 +154,7 @@ class Classifier extends EventEmitter {
})
}

static load (filename, callback) {
const fs = require('fs')

static load (filename, stemmer, callback) {
if (!callback) {
return
}
Expand All @@ -159,7 +163,7 @@ class Classifier extends EventEmitter {
callback(err, null)
} else {
const classifier = JSON.parse(data)
callback(err, classifier)
callback(err, Classifier.restore(classifier, stemmer))
}
})
}
Expand All @@ -177,33 +181,6 @@ class Classifier extends EventEmitter {
setOptions (options) {
this.keepStops = !!(options.keepStops)
}

ClassifiertrainParallel (numThreads, callback) {
if (parallelTrainer.Threads) {
return parallelTrainer.trainParallel(numThreads, callback)
} else {
this.emit('No threads available')
return this.train()
}
}

trainParallelBatches (options) {
if (parallelTrainer.Threads) {
return parallelTrainer.trainParallelBatches(options)
} else {
this.emit('No threads available')
return this.train()
}
}

retrainParallel (numThreads, callback) {
if (parallelTrainer.Threads) {
return parallelTrainer.trainParallel(numThreads, callback)
} else {
this.emit('No threads available')
return this.retrain()
}
}
}

module.exports = Classifier
6 changes: 4 additions & 2 deletions lib/natural/classifiers/classifier_train_parallel.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ let Threads = null
try {
Threads = require('webworker-threads')
} catch (e) {
// Since webworker-threads are optional, only thow if the module is found
if (e.code !== 'MODULE_NOT_FOUND') throw e
// Silently set Threads to null
Threads = null
}

function checkThreadSupport () {
Expand Down Expand Up @@ -266,6 +266,8 @@ function trainParallelBatches (options) {
}

function retrainParallel (numThreads, callback) {
checkThreadSupport()

this.classifier = new (this.classifier.constructor)()
this.lastAdded = 0
this.trainParallel(numThreads, callback)
Expand Down

0 comments on commit 50ee263

Please sign in to comment.