Skip to content
This repository

add GitHub Flavored Markdown to README #79

Merged
merged 1 commit into from over 1 year ago

2 participants

Phil Cohen Chris Umbel
Phil Cohen
  • This makes the example code easier to read, especially when perusing the docs on github
Phil Cohen phlipper add GitHub Flavored Markdown to README
* This makes the example code easier to read,
  especially when perusing the docs on github
38254c1
Chris Umbel chrisumbel merged commit 970b5bf into from
Chris Umbel
Owner

Thank you very much for taking the time!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Showing 1 unique commit by 1 author.

Dec 02, 2012
Phil Cohen phlipper add GitHub Flavored Markdown to README
* This makes the example code easier to read,
  especially when perusing the docs on github
38254c1
This page is out of date. Refresh to see the latest.

Showing 1 changed file with 335 additions and 209 deletions. Show diff stats Hide diff stats

  1. +335 209 README.md
544 README.md
Source Rendered
@@ -33,72 +33,92 @@ Tokenizers
33 33 Word, Regexp, and Treebank tokenizers are provided for breaking text up into
34 34 arrays of tokens:
35 35
36   - var natural = require('natural'),
37   - tokenizer = new natural.WordTokenizer();
38   - console.log(tokenizer.tokenize("your dog has flees."));
39   - // [ 'your', 'dog', 'has', 'flees' ]
  36 +```javascript
  37 +var natural = require('natural'),
  38 + tokenizer = new natural.WordTokenizer();
  39 +console.log(tokenizer.tokenize("your dog has flees."));
  40 +// [ 'your', 'dog', 'has', 'flees' ]
  41 +```
40 42
41 43 The other tokenizers follow a similar pattern:
42 44
43   - tokenizer = new natural.TreebankWordTokenizer();
44   - console.log(tokenizer.tokenize("my dog hasn't any flees."));
45   - // [ 'my', 'dog', 'has', 'n\'t', 'any', 'flees', '.' ]
  45 +```javascript
  46 +tokenizer = new natural.TreebankWordTokenizer();
  47 +console.log(tokenizer.tokenize("my dog hasn't any flees."));
  48 +// [ 'my', 'dog', 'has', 'n\'t', 'any', 'flees', '.' ]
46 49
47   - tokenizer = new natural.RegexpTokenizer({pattern: /\-/});
48   - console.log(tokenizer.tokenize("flee-dog"));
49   - // [ 'flee', 'dog' ]
  50 +tokenizer = new natural.RegexpTokenizer({pattern: /\-/});
  51 +console.log(tokenizer.tokenize("flee-dog"));
  52 +// [ 'flee', 'dog' ]
50 53
51   - tokenizer = new natural.WordPunctTokenizer();
52   - console.log(tokenizer.tokenize("my dog hasn't any flees."));
53   - // [ 'my', 'dog', 'hasn', '\'', 't', 'any', 'flees', '.' ]
  54 +tokenizer = new natural.WordPunctTokenizer();
  55 +console.log(tokenizer.tokenize("my dog hasn't any flees."));
  56 +// [ 'my', 'dog', 'hasn', '\'', 't', 'any', 'flees', '.' ]
  57 +```
54 58
55 59 String Distance
56 60 ----------------------
57 61 Natural provides an implementation of the Jaro-Winkler string distance measuring algorithm.
58 62 This will return a number between 0 and 1 which tells how closely the strings match (0 = not at all, 1 = exact match):
59 63
60   - var natural = require('natural');
61   - console.log(natural.JaroWinklerDistance("dixon","dicksonx"))
62   - console.log(natural.JaroWinklerDistance('not', 'same'));
  64 +```javascript
  65 +var natural = require('natural');
  66 +console.log(natural.JaroWinklerDistance("dixon","dicksonx"))
  67 +console.log(natural.JaroWinklerDistance('not', 'same'));
  68 +```
63 69
64 70 Output:
65 71
66   - 0.7466666666666666
67   - 0
  72 +```javascript
  73 +0.7466666666666666
  74 +0
  75 +```
68 76
69 77 Natural also offers support for Levenshtein distances:
70 78
71   - var natural = require('natural');
72   - console.log(natural.LevenshteinDistance("ones","onez"));
73   - console.log(natural.LevenshteinDistance('one', 'one'));
  79 +```javascript
  80 +var natural = require('natural');
  81 +console.log(natural.LevenshteinDistance("ones","onez"));
  82 +console.log(natural.LevenshteinDistance('one', 'one'));
  83 +```
74 84
75 85 Output:
76 86
77   - 2
78   - 0
  87 +```javascript
  88 +2
  89 +0
  90 +```
79 91
80 92 The cost of the three edit operations are modifiable for Levenshtein:
81 93
82   - console.log(natural.LevenshteinDistance("ones","onez", {
83   - insertion_cost: 1,
84   - deletion_cost: 1,
85   - substitution_cost: 1
86   - }));
  94 +```javascript
  95 +console.log(natural.LevenshteinDistance("ones","onez", {
  96 + insertion_cost: 1,
  97 + deletion_cost: 1,
  98 + substitution_cost: 1
  99 +}));
  100 +```
87 101
88 102 Output:
89 103
90   - 1
  104 +```javascript
  105 +1
  106 +```
91 107
92 108 And Dice's co-efficient:
93 109
94   - var natural = require('natural');
95   - console.log(natural.DiceCoefficient('thing', 'thing'));
96   - console.log(natural.DiceCoefficient('not', 'same'));
  110 +```javascript
  111 +var natural = require('natural');
  112 +console.log(natural.DiceCoefficient('thing', 'thing'));
  113 +console.log(natural.DiceCoefficient('not', 'same'));
  114 +```
97 115
98 116 Output:
99 117
100   - 1
101   - 0
  118 +```javascript
  119 +1
  120 +0
  121 +```
102 122
103 123 Stemmers
104 124 --------
@@ -106,33 +126,45 @@ Stemmers
106 126 Currently, stemming is supported via the Porter (English,Russian and Spanish) and Lancaster (Paice/Husk)
107 127 algorithms.
108 128
109   - var natural = require('natural');
  129 +```javascript
  130 +var natural = require('natural');
  131 +```
110 132
111 133 This example uses a Porter stemmer. "word" is returned.
112 134
113   - console.log(natural.PorterStemmer.stem("words")); // stem a single word
  135 +```javascript
  136 +console.log(natural.PorterStemmer.stem("words")); // stem a single word
  137 +```
114 138
115 139 in Russian:
116 140
117   - console.log(natural.PorterStemmerRu.stem("падший"));
  141 +```javascript
  142 +console.log(natural.PorterStemmerRu.stem("падший"));
  143 +```
118 144
119 145 in Spanish:
120 146
121   - console.log(natural.PorterStemmerEs.stem("jugaría"));
  147 +```javascript
  148 +console.log(natural.PorterStemmerEs.stem("jugaría"));
  149 +```
122 150
123 151 `attach()` patches `stem()` and `tokenizeAndStem()` to String as a shortcut to
124 152 `PorterStemmer.stem(token)`. `tokenizeAndStem()` breaks text up into single words
125 153 and returns an array of stemmed tokens.
126 154
127   - natural.PorterStemmer.attach();
128   - console.log("i am waking up to the sounds of chainsaws".tokenizeAndStem());
129   - console.log("chainsaws".stem());
  155 +```javascript
  156 +natural.PorterStemmer.attach();
  157 +console.log("i am waking up to the sounds of chainsaws".tokenizeAndStem());
  158 +console.log("chainsaws".stem());
  159 +```
130 160
131 161 the same thing can be done with a Lancaster stemmer:
132 162
133   - natural.LancasterStemmer.attach();
134   - console.log("i am waking up to the sounds of chainsaws".tokenizeAndStem());
135   - console.log("chainsaws".stem());
  163 +```javascript
  164 +natural.LancasterStemmer.attach();
  165 +console.log("i am waking up to the sounds of chainsaws".tokenizeAndStem());
  166 +console.log("chainsaws".stem());
  167 +```
136 168
137 169 Classifiers
138 170 ----------------------
@@ -141,69 +173,88 @@ Two classifiers are currently supported, Naive Bayes and logistic regression.
141 173 The following examples use the BayesClassifier class, but the
142 174 LogisticRegressionClassifier class could be substituted instead.
143 175
144   - var natural = require('natural'),
145   - classifier = new natural.BayesClassifier();
  176 +```javascript
  177 +var natural = require('natural'),
  178 + classifier = new natural.BayesClassifier();
  179 +```
146 180
147 181 You can train the classifier on sample text. It will use reasonable defaults to
148 182 tokenize and stem the text.
149 183
150   - classifier.addDocument('i am long qqqq', 'buy');
151   - classifier.addDocument('buy the q''s', 'buy');
152   - classifier.addDocument('short gold', 'sell');
153   - classifier.addDocument('sell gold', 'sell');
  184 +```javascript
  185 +classifier.addDocument('i am long qqqq', 'buy');
  186 +classifier.addDocument('buy the q''s', 'buy');
  187 +classifier.addDocument('short gold', 'sell');
  188 +classifier.addDocument('sell gold', 'sell');
154 189
155   - classifier.train();
  190 +classifier.train();
  191 +```
156 192
157 193 Outputs "sell"
158 194
159   - console.log(classifier.classify('i am short silver'));
  195 +```javascript
  196 +console.log(classifier.classify('i am short silver'));
  197 +```
160 198
161 199 Outputs "buy"
162 200
163   - console.log(classifier.classify('i am long copper'));
  201 +```javascript
  202 +console.log(classifier.classify('i am long copper'));
  203 +```
164 204
165 205 You have access to the set of matched classes and the associated value from the classifier.
166 206
167 207 Outputs:
168 208
169   - [ { label: 'sell', value: 0.39999999999999997 },
170   - { label: 'buy', value: 0.19999999999999998 } ]
  209 +```javascript
  210 +[ { label: 'sell', value: 0.39999999999999997 },
  211 + { label: 'buy', value: 0.19999999999999998 } ]
  212 +```
171 213
172 214 From this:
173 215
174   - console.log(classifier.getClassifications('i am long copper'));
175   -
  216 +```javascript
  217 +console.log(classifier.getClassifications('i am long copper'));
  218 +```
176 219
177 220 The classifier can also be trained with and can classify arrays of tokens, strings, or
178 221 any mixture of the two. Arrays let you use entirely custom data with your own
179 222 tokenization/stemming, if you choose to implement it.
180 223
181   - classifier.addDocument(['sell', 'gold'], 'sell');
  224 +```javascript
  225 +classifier.addDocument(['sell', 'gold'], 'sell');
  226 +```
182 227
183 228 A classifier can also be persisted and recalled later so that you can reuse it later.
184 229
185   - classifier.save('classifier.json', function(err, classifier) {
186   - // the classifier is saved to the classifier.json file!
187   - });
  230 +```javascript
  231 +classifier.save('classifier.json', function(err, classifier) {
  232 + // the classifier is saved to the classifier.json file!
  233 +});
  234 +```
188 235
189 236 To recall from the classifier.json saved above:
190 237
191   - natural.BayesClassifier.load('classifier.json', null, function(err, classifier) {
192   - console.log(classifier.classify('long SUNW'));
193   - console.log(classifier.classify('short SUNW'));
194   - });
  238 +```javascript
  239 +natural.BayesClassifier.load('classifier.json', null, function(err, classifier) {
  240 + console.log(classifier.classify('long SUNW'));
  241 + console.log(classifier.classify('short SUNW'));
  242 +});
  243 +```
195 244
196 245 A classifier can also be serialized and deserialized like so:
197 246
198   - var classifier = new natural.BayesClassifier();
199   - classifier.addDocument(['sell', 'gold'], 'sell');
200   - classifier.addDocument(['buy', 'silver'], 'buy');
  247 +```javascript
  248 +var classifier = new natural.BayesClassifier();
  249 +classifier.addDocument(['sell', 'gold'], 'sell');
  250 +classifier.addDocument(['buy', 'silver'], 'buy');
201 251
202   - // serialize
203   - var raw = JSON.stringify(classifier);
204   - // deserialize
205   - var restoredClassifier = natural.BayesClassifier.restore(JSON.parse(raw));
206   - console.log(restoredClassifier.classify('i should sell that'));
  252 +// serialize
  253 +var raw = JSON.stringify(classifier);
  254 +// deserialize
  255 +var restoredClassifier = natural.BayesClassifier.restore(JSON.parse(raw));
  256 +console.log(restoredClassifier.classify('i should sell that'));
  257 +```
207 258
208 259 Phonetics
209 260 ---------
@@ -211,66 +262,87 @@ Phonetics
211 262 Phonetic matching (sounds-like) matching can be done withthe SoundEx,
212 263 Metaphone, or DoubleMetaphone algorithms:
213 264
214   - var natural = require('natural'),
215   - metaphone = natural.Metaphone, soundEx = natural.SoundEx;
  265 +```javascript
  266 +var natural = require('natural'),
  267 + metaphone = natural.Metaphone, soundEx = natural.SoundEx;
216 268
217   - var wordA = 'phonetics';
218   - var wordB = 'fonetix';
  269 +var wordA = 'phonetics';
  270 +var wordB = 'fonetix';
  271 +```
219 272
220 273 To test the two words to see if they sound alike:
221 274
222   - if(metaphone.compare(wordA, wordB))
223   - console.log('they sound alike!');
  275 +```javascript
  276 +if(metaphone.compare(wordA, wordB))
  277 + console.log('they sound alike!');
  278 +```
224 279
225 280 The raw phonetics are obtained with `process()`:
226 281
227   - console.log(metaphone.process('phonetics'));
  282 +```javascript
  283 +console.log(metaphone.process('phonetics'));
  284 +```
228 285
229 286 A maximum code length can be supplied:
230 287
231   - console.log(metaphone.process('phonetics', 3));
  288 +```javascript
  289 +console.log(metaphone.process('phonetics', 3));
  290 +```
232 291
233 292 `DoubleMetaphone` deals with two encodings returned in an array. This
234 293 feature is experimental and subject to change:
235 294
236   - var natural = require('natural'),
237   - dm = natural.DoubleMetaphone;
  295 +```javascript
  296 +var natural = require('natural'),
  297 + dm = natural.DoubleMetaphone;
238 298
239   - var encodings = dm.process('Matrix');
240   - console.log(encodings[0]);
241   - console.log(encodings[1]);
  299 +var encodings = dm.process('Matrix');
  300 +console.log(encodings[0]);
  301 +console.log(encodings[1]);
  302 +```
242 303
243 304 Attaching will patch String with useful methods:
244 305
245   - metaphone.attach();
  306 +```javascript
  307 +metaphone.attach();
  308 +```
246 309
247 310 `soundsLike` is essentially a shortcut to `Metaphone.compare`:
248 311
249   - if(wordA.soundsLike(wordB))
250   - console.log('they sound alike!');
  312 +```javascript
  313 +if(wordA.soundsLike(wordB))
  314 + console.log('they sound alike!');
  315 +```
251 316
252 317 The raw phonetics are obtained with `phonetics()`:
253 318
254   - console.log('phonetics'.phonetics());
  319 +```javascript
  320 +console.log('phonetics'.phonetics());
  321 +```
255 322
256 323 Full text strings can be tokenized into arrays of phonetics (much like how tokenization-to-arrays works for stemmers):
257 324
258   - console.log('phonetics rock'.tokenizeAndPhoneticize());
  325 +```javascript
  326 +console.log('phonetics rock'.tokenizeAndPhoneticize());
  327 +```
259 328
260 329 Same module operations applied with `SoundEx`:
261 330
262   - if(soundEx.compare(wordA, wordB))
263   - console.log('they sound alike!');
  331 +```javascript
  332 +if(soundEx.compare(wordA, wordB))
  333 + console.log('they sound alike!');
  334 +```
264 335
265 336 The same String patches apply with `soundEx`:
266 337
267   - soundEx.attach();
  338 +```javascript
  339 +soundEx.attach();
268 340
269   - if(wordA.soundsLike(wordB))
270   - console.log('they sound alike!');
271   -
272   - console.log('phonetics'.phonetics());
  341 +if(wordA.soundsLike(wordB))
  342 + console.log('they sound alike!');
273 343
  344 +console.log('phonetics'.phonetics());
  345 +```
274 346
275 347 Inflectors
276 348 ----------
@@ -279,38 +351,52 @@ Inflectors
279 351
280 352 Nouns can be pluralized/singularized with a `NounInflector`:
281 353
282   - var natural = require('natural'),
283   - nounInflector = new natural.NounInflector();
  354 +```javascript
  355 +var natural = require('natural'),
  356 +nounInflector = new natural.NounInflector();
  357 +```
284 358
285 359 To pluralize a word (outputs "radii"):
286 360
287   - console.log(nounInflector.pluralize('radius'));
  361 +```javascript
  362 +console.log(nounInflector.pluralize('radius'));
  363 +```
288 364
289 365 To singularize a word (outputs "beer"):
290 366
291   - console.log(nounInflector.singularize('beers'));
  367 +```javascript
  368 +console.log(nounInflector.singularize('beers'));
  369 +```
292 370
293 371 Like many of the other features, String can be patched to perform the operations
294 372 directly. The "Noun" suffix on the methods is necessary, as verbs will be
295 373 supported in the future.
296 374
297   - nounInflector.attach();
298   - console.log('radius'.pluralizeNoun());
299   - console.log('beers'.singularizeNoun());
  375 +```javascript
  376 +nounInflector.attach();
  377 +console.log('radius'.pluralizeNoun());
  378 +console.log('beers'.singularizeNoun());
  379 +```
300 380
301 381 ### Numbers
302 382
303 383 Numbers can be counted with a CountInflector:
304 384
305   - var countInflector = natural.CountInflector;
  385 +```javascript
  386 +var countInflector = natural.CountInflector;
  387 +```
306 388
307 389 Outputs "1st":
308 390
309   - console.log(countInflector.nth(1));
  391 +```javascript
  392 +console.log(countInflector.nth(1));
  393 +```
310 394
311 395 Outputs "111th":
312 396
313   - console.log(countInflector.nth(111));
  397 +```javascript
  398 +console.log(countInflector.nth(111));
  399 +```
314 400
315 401 ### Present Tense Verbs
316 402
@@ -318,22 +404,30 @@ Present Tense Verbs can be pluralized/singularized with a PresentVerbInflector.
318 404 This feature is still experimental as of 0.0.42, so use with caution, and please
319 405 provide feedback.
320 406
321   - var verbInflector = new natural.PresentVerbInflector();
  407 +```javascript
  408 +var verbInflector = new natural.PresentVerbInflector();
  409 +```
322 410
323 411 Outputs "becomes":
324 412
325   - console.log(verbInflector.singularize('become'));
  413 +```javascript
  414 +console.log(verbInflector.singularize('become'));
  415 +```
326 416
327 417 Outputs "become":
328 418
329   - console.log(verbInflector.pluralize('becomes'));
  419 +```javascript
  420 +console.log(verbInflector.pluralize('becomes'));
  421 +```
330 422
331 423 Like many other natural modules, `attach()` can be used to patch strings with
332 424 handy methods.
333 425
334   - verbInflector.attach();
335   - console.log('walk'.singularizePresentVerb());
336   - console.log('walks'.pluralizePresentVerb());
  426 +```javascript
  427 +verbInflector.attach();
  428 +console.log('walk'.singularizePresentVerb());
  429 +console.log('walks'.pluralizePresentVerb());
  430 +```
337 431
338 432
339 433 N-Grams
@@ -342,32 +436,40 @@ N-Grams
342 436 n-grams can be obtained for either arrays or strings (which will be tokenized
343 437 for you):
344 438
345   - var NGrams = natural.NGrams;
  439 +```javascript
  440 +var NGrams = natural.NGrams;
  441 +```
346 442
347 443 ### bigrams
348 444
349   - console.log(NGrams.bigrams('some words here'));
350   - console.log(NGrams.bigrams(['some', 'words', 'here']));
  445 +```javascript
  446 +console.log(NGrams.bigrams('some words here'));
  447 +console.log(NGrams.bigrams(['some', 'words', 'here']));
  448 +```
351 449
352   -Both of the above output: [ [ 'some', 'words' ], [ 'words', 'here' ] ]
  450 +Both of the above output: `[ [ 'some', 'words' ], [ 'words', 'here' ] ]`
353 451
354 452 ### trigrams
355 453
356   - console.log(NGrams.trigrams('some other words here'));
357   - console.log(NGrams.trigrams(['some', 'other', 'words', 'here']));
  454 +```javascript
  455 +console.log(NGrams.trigrams('some other words here'));
  456 +console.log(NGrams.trigrams(['some', 'other', 'words', 'here']));
  457 +```
358 458
359   -Both of the above output: [ [ 'some', 'other', 'words' ],
360   - [ 'other', 'words', 'here' ] ]
  459 +Both of the above output: `[ [ 'some', 'other', 'words' ],
  460 + [ 'other', 'words', 'here' ] ]`
361 461
362 462 ### arbitrary n-grams
363 463
364   - console.log(NGrams.ngrams('some other words here for you', 4));
365   - console.log(NGrams.ngrams(['some', 'other', 'words', 'here', 'for',
366   - 'you'], 4));
  464 +```javascript
  465 +console.log(NGrams.ngrams('some other words here for you', 4));
  466 +console.log(NGrams.ngrams(['some', 'other', 'words', 'here', 'for',
  467 + 'you'], 4));
  468 +```
367 469
368   -The above outputs: [ [ 'some', 'other', 'words', 'here' ],
  470 +The above outputs: `[ [ 'some', 'other', 'words', 'here' ],
369 471 [ 'other', 'words', 'here', 'for' ],
370   - [ 'words', 'here', 'for', 'you' ] ]
  472 + [ 'words', 'here', 'for', 'you' ] ]`
371 473
372 474 tf-idf
373 475 -----
@@ -377,108 +479,126 @@ document relative to a corpus. The following example will add four documents to
377 479 a corpus and determine the weight of the word "node", then the weight of the
378 480 word "ruby" in each document.
379 481
380   - var natural = require('natural'),
381   - TfIdf = natural.TfIdf,
382   - tfidf = new TfIdf();
  482 +```javascript
  483 +var natural = require('natural'),
  484 + TfIdf = natural.TfIdf,
  485 + tfidf = new TfIdf();
383 486
384   - tfidf.addDocument('this document is about node.');
385   - tfidf.addDocument('this document is about ruby.');
386   - tfidf.addDocument('this document is about ruby and node.');
387   - tfidf.addDocument('this document is about node. it has node examples');
  487 +tfidf.addDocument('this document is about node.');
  488 +tfidf.addDocument('this document is about ruby.');
  489 +tfidf.addDocument('this document is about ruby and node.');
  490 +tfidf.addDocument('this document is about node. it has node examples');
388 491
389   - console.log('node --------------------------------');
390   - tfidf.tfidfs('node', function(i, measure) {
391   - console.log('document #' + i + ' is ' + measure);
392   - });
  492 +console.log('node --------------------------------');
  493 +tfidf.tfidfs('node', function(i, measure) {
  494 + console.log('document #' + i + ' is ' + measure);
  495 +});
393 496
394   - console.log('ruby --------------------------------');
395   - tfidf.tfidfs('ruby', function(i, measure) {
396   - console.log('document #' + i + ' is ' + measure);
397   - });
  497 +console.log('ruby --------------------------------');
  498 +tfidf.tfidfs('ruby', function(i, measure) {
  499 + console.log('document #' + i + ' is ' + measure);
  500 +});
  501 +```
398 502
399 503 The above outputs:
400 504
401   - node --------------------------------
402   - document #0 is 1.4469189829363254
403   - document #1 is 0
404   - document #2 is 1.4469189829363254
405   - document #3 is 2.8938379658726507
406   - ruby --------------------------------
407   - document #0 is 0
408   - document #1 is 1.466337068793427
409   - document #2 is 1.466337068793427
410   - document #3 is 0
  505 +```
  506 +node --------------------------------
  507 +document #0 is 1.4469189829363254
  508 +document #1 is 0
  509 +document #2 is 1.4469189829363254
  510 +document #3 is 2.8938379658726507
  511 +ruby --------------------------------
  512 +document #0 is 0
  513 +document #1 is 1.466337068793427
  514 +document #2 is 1.466337068793427
  515 +document #3 is 0
  516 +```
411 517
412 518 This approach can also be applied to individual documents.
413 519
414 520 The following example measures the term "node" in the first and second documents.
415 521
416   - console.log(tfidf.tfidf('node', 0));
417   - console.log(tfidf.tfidf('node', 1));
  522 +```javascript
  523 +console.log(tfidf.tfidf('node', 0));
  524 +console.log(tfidf.tfidf('node', 1));
  525 +```
418 526
419 527 A TfIdf instance can also load documents from files on disk.
420 528
421   - var tfidf = new TfIdf();
422   - tfidf.addFileSync('data_files/one.txt');
423   - tfidf.addFileSync('data_files/two.txt');
  529 +```javascript
  530 +var tfidf = new TfIdf();
  531 +tfidf.addFileSync('data_files/one.txt');
  532 +tfidf.addFileSync('data_files/two.txt');
  533 +```
424 534
425 535 Multiple terms can be measured as well, with their weights being added into
426 536 a single measure value. The following example determines that the last document
427 537 is the most relevent to the words "node" and "ruby".
428 538
429   - var natural = require('natural'),
430   - TfIdf = natural.TfIdf,
431   - tfidf = new TfIdf();
  539 +```javascript
  540 +var natural = require('natural'),
  541 + TfIdf = natural.TfIdf,
  542 + tfidf = new TfIdf();
432 543
433   - tfidf.addDocument('this document is about node.');
434   - tfidf.addDocument('this document is about ruby.');
435   - tfidf.addDocument('this document is about ruby and node.');
  544 +tfidf.addDocument('this document is about node.');
  545 +tfidf.addDocument('this document is about ruby.');
  546 +tfidf.addDocument('this document is about ruby and node.');
436 547
437   - tfidf.tfidfs('node ruby', function(i, measure) {
438   - console.log('document #' + i + ' is ' + measure);
439   - });
  548 +tfidf.tfidfs('node ruby', function(i, measure) {
  549 + console.log('document #' + i + ' is ' + measure);
  550 +});
  551 +```
440 552
441 553 The above outputs:
442 554
443   - document #0 is 1.2039728043259361
444   - document #1 is 1.2039728043259361
445   - document #2 is 2.4079456086518722
  555 +```
  556 +document #0 is 1.2039728043259361
  557 +document #1 is 1.2039728043259361
  558 +document #2 is 2.4079456086518722
  559 +```
446 560
447 561 The examples above all use strings, which case natural to automatically tokenize the input.
448 562 If you wish to perform your own tokenization or other kinds of processing, you
449 563 can do so, then pass in the resultant arrays later. This approach allows you to bypass natural's
450 564 default preprocessing.
451 565
452   - var natural = require('natural'),
453   - TfIdf = natural.TfIdf,
454   - tfidf = new TfIdf();
  566 +```javascript
  567 +var natural = require('natural'),
  568 + TfIdf = natural.TfIdf,
  569 + tfidf = new TfIdf();
455 570
456   - tfidf.addDocument(['document', 'about', 'node']);
457   - tfidf.addDocument(['document', 'about', 'ruby']);
458   - tfidf.addDocument(['document', 'about', 'ruby', 'node']);
459   - tfidf.addDocument(['document', 'about', 'node', 'node', 'examples']);
  571 +tfidf.addDocument(['document', 'about', 'node']);
  572 +tfidf.addDocument(['document', 'about', 'ruby']);
  573 +tfidf.addDocument(['document', 'about', 'ruby', 'node']);
  574 +tfidf.addDocument(['document', 'about', 'node', 'node', 'examples']);
460 575
461   - tfidf.tfidfs(['node', 'ruby'], function(i, measure) {
462   - console.log('document #' + i + ' is ' + measure);
463   - });
  576 +tfidf.tfidfs(['node', 'ruby'], function(i, measure) {
  577 + console.log('document #' + i + ' is ' + measure);
  578 +});
  579 +```
464 580
465 581 It's possible to retrieve a list of all terms in a document, sorted by their
466 582 importance.
467 583
468   - tfidf.listTerms(0 /*document index*/).forEach(function(item) {
469   - console.log(item.term + ': ' + item.tfidf);
470   - });
  584 +```javascript
  585 +tfidf.listTerms(0 /*document index*/).forEach(function(item) {
  586 + console.log(item.term + ': ' + item.tfidf);
  587 +});
  588 +```
471 589
472 590 A TfIdf instance can also be serialized and deserialzed for save and recall.
473 591
474   - var tfidf = new TfIdf();
475   - tfidf.addDocument('document one', 'un');
476   - tfidf.addDocument('document Two', 'deux');
477   - var s = JSON.stringify(tfidf);
478   - // save "s" to disk, database or otherwise
  592 +```javascript
  593 +var tfidf = new TfIdf();
  594 +tfidf.addDocument('document one', 'un');
  595 +tfidf.addDocument('document Two', 'deux');
  596 +var s = JSON.stringify(tfidf);
  597 +// save "s" to disk, database or otherwise
479 598
480   - // assuming you pulled "s" back out of storage.
481   - var tfidf = new TfIdf(JSON.parse(s));
  599 +// assuming you pulled "s" back out of storage.
  600 +var tfidf = new TfIdf(JSON.parse(s));
  601 +```
482 602
483 603 WordNet
484 604 -------
@@ -496,35 +616,41 @@ and not production-ready. The API is also subject to change.
496 616
497 617 Here's an exmple of looking up definitions for the word, "node".
498 618
499   - var wordnet = new natural.WordNet();
500   -
501   - wordnet.lookup('node', function(results) {
502   - results.forEach(function(result) {
503   - console.log('------------------------------------');
504   - console.log(result.synsetOffset);
505   - console.log(result.pos);
506   - console.log(result.lemma);
507   - console.log(result.synonyms);
508   - console.log(result.pos);
509   - console.log(result.gloss);
510   - });
511   - });
512   -
513   -Given a synset offset and a part of speech, a definition can be looked up directly.
514   -
515   - var wordnet = new natural.WordNet();
  619 +```javascript
  620 +var wordnet = new natural.WordNet();
516 621
517   - wordnet.get(4424418, 'n', function(result) {
  622 +wordnet.lookup('node', function(results) {
  623 + results.forEach(function(result) {
518 624 console.log('------------------------------------');
  625 + console.log(result.synsetOffset);
  626 + console.log(result.pos);
519 627 console.log(result.lemma);
  628 + console.log(result.synonyms);
520 629 console.log(result.pos);
521 630 console.log(result.gloss);
522   - console.log(result.synonyms);
523 631 });
  632 +});
  633 +```
  634 +
  635 +Given a synset offset and a part of speech, a definition can be looked up directly.
  636 +
  637 +```javascript
  638 +var wordnet = new natural.WordNet();
  639 +
  640 +wordnet.get(4424418, 'n', function(result) {
  641 + console.log('------------------------------------');
  642 + console.log(result.lemma);
  643 + console.log(result.pos);
  644 + console.log(result.gloss);
  645 + console.log(result.synonyms);
  646 +});
  647 +```
524 648
525 649 If you have _manually_ downloaded the WordNet database files, you can pass the folder to the constructor:
526 650
527   - var wordnet = new natural.WordNet('/my/wordnet/dict');
  651 +```javascript
  652 +var wordnet = new natural.WordNet('/my/wordnet/dict');
  653 +```
528 654
529 655 As of v0.1.11, WordNet data files are no longer automatically downloaded.
530 656

Tip: You can add notes to lines in a file. Hover to the left of a line to make a note

Something went wrong with that request. Please try again.