Skip to content
This repository
Browse code

Merge pull request #79 from phlipper/patch-1

add GitHub Flavored Markdown to README
  • Loading branch information...
commit 970b5bfca41ad4ed84af980777590c5b0e8e8d19 2 parents 7e749c4 + 38254c1
Chris Umbel authored December 02, 2012

Showing 1 changed file with 335 additions and 209 deletions. Show diff stats Hide diff stats

  1. 544  README.md
544  README.md
Source Rendered
@@ -33,72 +33,92 @@ Tokenizers
33 33
 Word, Regexp, and Treebank tokenizers are provided for breaking text up into
34 34
 arrays of tokens:
35 35
 
36  
-    var natural = require('natural'),
37  
-      tokenizer = new natural.WordTokenizer();
38  
-    console.log(tokenizer.tokenize("your dog has flees."));
39  
-    // [ 'your', 'dog', 'has', 'flees' ]
  36
+```javascript
  37
+var natural = require('natural'),
  38
+  tokenizer = new natural.WordTokenizer();
  39
+console.log(tokenizer.tokenize("your dog has flees."));
  40
+// [ 'your', 'dog', 'has', 'flees' ]
  41
+```
40 42
 
41 43
 The other tokenizers follow a similar pattern:
42 44
 
43  
-    tokenizer = new natural.TreebankWordTokenizer();
44  
-    console.log(tokenizer.tokenize("my dog hasn't any flees."));
45  
-    // [ 'my', 'dog', 'has', 'n\'t', 'any', 'flees', '.' ]
  45
+```javascript
  46
+tokenizer = new natural.TreebankWordTokenizer();
  47
+console.log(tokenizer.tokenize("my dog hasn't any flees."));
  48
+// [ 'my', 'dog', 'has', 'n\'t', 'any', 'flees', '.' ]
46 49
 
47  
-    tokenizer = new natural.RegexpTokenizer({pattern: /\-/});
48  
-    console.log(tokenizer.tokenize("flee-dog"));
49  
-    // [ 'flee', 'dog' ]
  50
+tokenizer = new natural.RegexpTokenizer({pattern: /\-/});
  51
+console.log(tokenizer.tokenize("flee-dog"));
  52
+// [ 'flee', 'dog' ]
50 53
 
51  
-    tokenizer = new natural.WordPunctTokenizer();
52  
-    console.log(tokenizer.tokenize("my dog hasn't any flees."));
53  
-    // [ 'my',  'dog',  'hasn',  '\'',  't',  'any',  'flees',  '.' ]
  54
+tokenizer = new natural.WordPunctTokenizer();
  55
+console.log(tokenizer.tokenize("my dog hasn't any flees."));
  56
+// [ 'my',  'dog',  'hasn',  '\'',  't',  'any',  'flees',  '.' ]
  57
+```
54 58
 
55 59
 String Distance
56 60
 ----------------------
57 61
 Natural provides an implementation of the Jaro-Winkler string distance measuring algorithm.
58 62
 This will return a number between 0 and 1 which tells how closely the strings match (0 = not at all, 1 = exact match):
59 63
 
60  
-    var natural = require('natural');
61  
-    console.log(natural.JaroWinklerDistance("dixon","dicksonx"))
62  
-    console.log(natural.JaroWinklerDistance('not', 'same'));
  64
+```javascript
  65
+var natural = require('natural');
  66
+console.log(natural.JaroWinklerDistance("dixon","dicksonx"))
  67
+console.log(natural.JaroWinklerDistance('not', 'same'));
  68
+```
63 69
 
64 70
 Output:
65 71
 
66  
-    0.7466666666666666
67  
-    0
  72
+```javascript
  73
+0.7466666666666666
  74
+0
  75
+```
68 76
 
69 77
 Natural also offers support for Levenshtein distances:
70 78
 
71  
-    var natural = require('natural');
72  
-    console.log(natural.LevenshteinDistance("ones","onez"));
73  
-    console.log(natural.LevenshteinDistance('one', 'one'));
  79
+```javascript
  80
+var natural = require('natural');
  81
+console.log(natural.LevenshteinDistance("ones","onez"));
  82
+console.log(natural.LevenshteinDistance('one', 'one'));
  83
+```
74 84
 
75 85
 Output:
76 86
 
77  
-    2
78  
-    0
  87
+```javascript
  88
+2
  89
+0
  90
+```
79 91
 
80 92
 The cost of the three edit operations are modifiable for Levenshtein:
81 93
 
82  
-    console.log(natural.LevenshteinDistance("ones","onez", {
83  
-        insertion_cost: 1,
84  
-        deletion_cost: 1,
85  
-        substitution_cost: 1
86  
-    }));
  94
+```javascript
  95
+console.log(natural.LevenshteinDistance("ones","onez", {
  96
+    insertion_cost: 1,
  97
+    deletion_cost: 1,
  98
+    substitution_cost: 1
  99
+}));
  100
+```
87 101
 
88 102
 Output:
89 103
 
90  
-    1
  104
+```javascript
  105
+1
  106
+```
91 107
 
92 108
 And Dice's co-efficient:
93 109
 
94  
-    var natural = require('natural');
95  
-    console.log(natural.DiceCoefficient('thing', 'thing'));
96  
-    console.log(natural.DiceCoefficient('not', 'same'));
  110
+```javascript
  111
+var natural = require('natural');
  112
+console.log(natural.DiceCoefficient('thing', 'thing'));
  113
+console.log(natural.DiceCoefficient('not', 'same'));
  114
+```
97 115
 
98 116
 Output:
99 117
 
100  
-    1
101  
-    0
  118
+```javascript
  119
+1
  120
+0
  121
+```
102 122
 
103 123
 Stemmers
104 124
 --------
@@ -106,33 +126,45 @@ Stemmers
106 126
 Currently, stemming is supported via the Porter (English,Russian and Spanish) and Lancaster (Paice/Husk)
107 127
 algorithms.
108 128
 
109  
-    var natural = require('natural');
  129
+```javascript
  130
+var natural = require('natural');
  131
+```
110 132
 
111 133
 This example uses a Porter stemmer. "word" is returned.
112 134
 
113  
-    console.log(natural.PorterStemmer.stem("words")); // stem a single word
  135
+```javascript
  136
+console.log(natural.PorterStemmer.stem("words")); // stem a single word
  137
+```
114 138
 
115 139
  in Russian:
116 140
 
117  
-    console.log(natural.PorterStemmerRu.stem("падший"));
  141
+```javascript
  142
+console.log(natural.PorterStemmerRu.stem("падший"));
  143
+```
118 144
 
119 145
  in Spanish:
120 146
 
121  
-    console.log(natural.PorterStemmerEs.stem("jugaría"));
  147
+```javascript
  148
+console.log(natural.PorterStemmerEs.stem("jugaría"));
  149
+```
122 150
 
123 151
 `attach()` patches `stem()` and `tokenizeAndStem()` to String as a shortcut to
124 152
 `PorterStemmer.stem(token)`. `tokenizeAndStem()` breaks text up into single words
125 153
 and returns an array of stemmed tokens.
126 154
 
127  
-    natural.PorterStemmer.attach();
128  
-    console.log("i am waking up to the sounds of chainsaws".tokenizeAndStem());
129  
-    console.log("chainsaws".stem());
  155
+```javascript
  156
+natural.PorterStemmer.attach();
  157
+console.log("i am waking up to the sounds of chainsaws".tokenizeAndStem());
  158
+console.log("chainsaws".stem());
  159
+```
130 160
 
131 161
 the same thing can be done with a Lancaster stemmer:
132 162
 
133  
-    natural.LancasterStemmer.attach();
134  
-    console.log("i am waking up to the sounds of chainsaws".tokenizeAndStem());
135  
-    console.log("chainsaws".stem());
  163
+```javascript
  164
+natural.LancasterStemmer.attach();
  165
+console.log("i am waking up to the sounds of chainsaws".tokenizeAndStem());
  166
+console.log("chainsaws".stem());
  167
+```
136 168
 
137 169
 Classifiers
138 170
 ----------------------
@@ -141,69 +173,88 @@ Two classifiers are currently supported, Naive Bayes and logistic regression.
141 173
 The following examples use the BayesClassifier class, but the
142 174
 LogisticRegressionClassifier class could be substituted instead.
143 175
 
144  
-    var natural = require('natural'),
145  
-      classifier = new natural.BayesClassifier();
  176
+```javascript
  177
+var natural = require('natural'),
  178
+  classifier = new natural.BayesClassifier();
  179
+```
146 180
 
147 181
 You can train the classifier on sample text. It will use reasonable defaults to
148 182
 tokenize and stem the text.
149 183
 
150  
-    classifier.addDocument('i am long qqqq', 'buy');
151  
-    classifier.addDocument('buy the q''s', 'buy');
152  
-    classifier.addDocument('short gold', 'sell');
153  
-    classifier.addDocument('sell gold', 'sell');
  184
+```javascript
  185
+classifier.addDocument('i am long qqqq', 'buy');
  186
+classifier.addDocument('buy the q''s', 'buy');
  187
+classifier.addDocument('short gold', 'sell');
  188
+classifier.addDocument('sell gold', 'sell');
154 189
 
155  
-    classifier.train();
  190
+classifier.train();
  191
+```
156 192
 
157 193
 Outputs "sell"
158 194
 
159  
-    console.log(classifier.classify('i am short silver'));
  195
+```javascript
  196
+console.log(classifier.classify('i am short silver'));
  197
+```
160 198
 
161 199
 Outputs "buy"
162 200
 
163  
-    console.log(classifier.classify('i am long copper'));
  201
+```javascript
  202
+console.log(classifier.classify('i am long copper'));
  203
+```
164 204
 
165 205
 You have access to the set of matched classes and the associated value from the classifier.
166 206
 
167 207
 Outputs:
168 208
 
169  
-    [ { label: 'sell', value: 0.39999999999999997 },
170  
-      { label: 'buy', value: 0.19999999999999998 } ]
  209
+```javascript
  210
+[ { label: 'sell', value: 0.39999999999999997 },
  211
+  { label: 'buy', value: 0.19999999999999998 } ]
  212
+```
171 213
 
172 214
 From this:
173 215
 
174  
-    console.log(classifier.getClassifications('i am long copper'));
175  
-
  216
+```javascript
  217
+console.log(classifier.getClassifications('i am long copper'));
  218
+```
176 219
 
177 220
 The classifier can also be trained with and can classify arrays of tokens, strings, or
178 221
 any mixture of the two. Arrays let you use entirely custom data with your own
179 222
 tokenization/stemming, if you choose to implement it.
180 223
 
181  
-    classifier.addDocument(['sell', 'gold'], 'sell');
  224
+```javascript
  225
+classifier.addDocument(['sell', 'gold'], 'sell');
  226
+```
182 227
 
183 228
 A classifier can also be persisted and recalled later so that you can reuse it later.
184 229
 
185  
-    classifier.save('classifier.json', function(err, classifier) {
186  
-        // the classifier is saved to the classifier.json file!
187  
-    });
  230
+```javascript
  231
+classifier.save('classifier.json', function(err, classifier) {
  232
+    // the classifier is saved to the classifier.json file!
  233
+});
  234
+```
188 235
 
189 236
 To recall from the classifier.json saved above:
190 237
 
191  
-    natural.BayesClassifier.load('classifier.json', null, function(err, classifier) {
192  
-        console.log(classifier.classify('long SUNW'));
193  
-        console.log(classifier.classify('short SUNW'));
194  
-    });
  238
+```javascript
  239
+natural.BayesClassifier.load('classifier.json', null, function(err, classifier) {
  240
+    console.log(classifier.classify('long SUNW'));
  241
+    console.log(classifier.classify('short SUNW'));
  242
+});
  243
+```
195 244
 
196 245
 A classifier can also be serialized and deserialized like so:
197 246
 
198  
-    var classifier = new natural.BayesClassifier();
199  
-    classifier.addDocument(['sell', 'gold'], 'sell');
200  
-    classifier.addDocument(['buy', 'silver'], 'buy');
  247
+```javascript
  248
+var classifier = new natural.BayesClassifier();
  249
+classifier.addDocument(['sell', 'gold'], 'sell');
  250
+classifier.addDocument(['buy', 'silver'], 'buy');
201 251
 
202  
-    // serialize
203  
-    var raw = JSON.stringify(classifier);
204  
-    // deserialize
205  
-    var restoredClassifier = natural.BayesClassifier.restore(JSON.parse(raw));
206  
-    console.log(restoredClassifier.classify('i should sell that'));
  252
+// serialize
  253
+var raw = JSON.stringify(classifier);
  254
+// deserialize
  255
+var restoredClassifier = natural.BayesClassifier.restore(JSON.parse(raw));
  256
+console.log(restoredClassifier.classify('i should sell that'));
  257
+```
207 258
 
208 259
 Phonetics
209 260
 ---------
@@ -211,66 +262,87 @@ Phonetics
211 262
 Phonetic matching (sounds-like) matching can be done withthe SoundEx,
212 263
 Metaphone, or DoubleMetaphone algorithms:
213 264
 
214  
-    var natural = require('natural'),
215  
-        metaphone = natural.Metaphone, soundEx = natural.SoundEx;
  265
+```javascript
  266
+var natural = require('natural'),
  267
+    metaphone = natural.Metaphone, soundEx = natural.SoundEx;
216 268
 
217  
-    var wordA = 'phonetics';
218  
-    var wordB = 'fonetix';
  269
+var wordA = 'phonetics';
  270
+var wordB = 'fonetix';
  271
+```
219 272
 
220 273
 To test the two words to see if they sound alike:
221 274
 
222  
-    if(metaphone.compare(wordA, wordB))
223  
-        console.log('they sound alike!');
  275
+```javascript
  276
+if(metaphone.compare(wordA, wordB))
  277
+    console.log('they sound alike!');
  278
+```
224 279
 
225 280
 The raw phonetics are obtained with `process()`:
226 281
 
227  
-    console.log(metaphone.process('phonetics'));
  282
+```javascript
  283
+console.log(metaphone.process('phonetics'));
  284
+```
228 285
 
229 286
 A maximum code length can be supplied:
230 287
 
231  
-    console.log(metaphone.process('phonetics', 3));
  288
+```javascript
  289
+console.log(metaphone.process('phonetics', 3));
  290
+```
232 291
 
233 292
 `DoubleMetaphone` deals with two encodings returned in an array. This
234 293
 feature is experimental and subject to change:
235 294
 
236  
-    var natural = require('natural'),
237  
-      dm = natural.DoubleMetaphone;
  295
+```javascript
  296
+var natural = require('natural'),
  297
+  dm = natural.DoubleMetaphone;
238 298
 
239  
-    var encodings = dm.process('Matrix');
240  
-    console.log(encodings[0]);
241  
-    console.log(encodings[1]);
  299
+var encodings = dm.process('Matrix');
  300
+console.log(encodings[0]);
  301
+console.log(encodings[1]);
  302
+```
242 303
 
243 304
 Attaching will patch String with useful methods:
244 305
 
245  
-    metaphone.attach();
  306
+```javascript
  307
+metaphone.attach();
  308
+```
246 309
 
247 310
 `soundsLike` is essentially a shortcut to `Metaphone.compare`:
248 311
 
249  
-    if(wordA.soundsLike(wordB))
250  
-        console.log('they sound alike!');
  312
+```javascript
  313
+if(wordA.soundsLike(wordB))
  314
+    console.log('they sound alike!');
  315
+```
251 316
 
252 317
 The raw phonetics are obtained with `phonetics()`:
253 318
 
254  
-    console.log('phonetics'.phonetics());
  319
+```javascript
  320
+console.log('phonetics'.phonetics());
  321
+```
255 322
 
256 323
 Full text strings can be tokenized into arrays of phonetics (much like how tokenization-to-arrays works for stemmers):
257 324
 
258  
-    console.log('phonetics rock'.tokenizeAndPhoneticize());
  325
+```javascript
  326
+console.log('phonetics rock'.tokenizeAndPhoneticize());
  327
+```
259 328
 
260 329
 Same module operations applied with `SoundEx`:
261 330
 
262  
-    if(soundEx.compare(wordA, wordB))
263  
-        console.log('they sound alike!');
  331
+```javascript
  332
+if(soundEx.compare(wordA, wordB))
  333
+    console.log('they sound alike!');
  334
+```
264 335
 
265 336
 The same String patches apply with `soundEx`:
266 337
 
267  
-    soundEx.attach();
  338
+```javascript
  339
+soundEx.attach();
268 340
 
269  
-    if(wordA.soundsLike(wordB))
270  
-        console.log('they sound alike!');
271  
-
272  
-    console.log('phonetics'.phonetics());
  341
+if(wordA.soundsLike(wordB))
  342
+    console.log('they sound alike!');
273 343
 
  344
+console.log('phonetics'.phonetics());
  345
+```
274 346
 
275 347
 Inflectors
276 348
 ----------
@@ -279,38 +351,52 @@ Inflectors
279 351
 
280 352
 Nouns can be pluralized/singularized with a `NounInflector`:
281 353
 
282  
-    var natural = require('natural'),
283  
-    nounInflector = new natural.NounInflector();
  354
+```javascript
  355
+var natural = require('natural'),
  356
+nounInflector = new natural.NounInflector();
  357
+```
284 358
 
285 359
 To pluralize a word (outputs "radii"):
286 360
 
287  
-    console.log(nounInflector.pluralize('radius'));
  361
+```javascript
  362
+console.log(nounInflector.pluralize('radius'));
  363
+```
288 364
 
289 365
 To singularize a word (outputs "beer"):
290 366
 
291  
-    console.log(nounInflector.singularize('beers'));
  367
+```javascript
  368
+console.log(nounInflector.singularize('beers'));
  369
+```
292 370
 
293 371
 Like many of the other features, String can be patched to perform the operations
294 372
 directly. The "Noun" suffix on the methods is necessary, as verbs will be
295 373
 supported in the future.
296 374
 
297  
-    nounInflector.attach();
298  
-    console.log('radius'.pluralizeNoun());
299  
-    console.log('beers'.singularizeNoun());
  375
+```javascript
  376
+nounInflector.attach();
  377
+console.log('radius'.pluralizeNoun());
  378
+console.log('beers'.singularizeNoun());
  379
+```
300 380
 
301 381
 ### Numbers
302 382
 
303 383
 Numbers can be counted with a CountInflector:
304 384
 
305  
-    var countInflector = natural.CountInflector;
  385
+```javascript
  386
+var countInflector = natural.CountInflector;
  387
+```
306 388
 
307 389
 Outputs "1st":
308 390
 
309  
-    console.log(countInflector.nth(1));
  391
+```javascript
  392
+console.log(countInflector.nth(1));
  393
+```
310 394
 
311 395
 Outputs "111th":
312 396
 
313  
-    console.log(countInflector.nth(111));
  397
+```javascript
  398
+console.log(countInflector.nth(111));
  399
+```
314 400
 
315 401
 ### Present Tense Verbs
316 402
 
@@ -318,22 +404,30 @@ Present Tense Verbs can be pluralized/singularized with a PresentVerbInflector.
318 404
 This feature is still experimental as of 0.0.42, so use with caution, and please
319 405
 provide feedback.
320 406
 
321  
-    var verbInflector = new natural.PresentVerbInflector();
  407
+```javascript
  408
+var verbInflector = new natural.PresentVerbInflector();
  409
+```
322 410
 
323 411
 Outputs "becomes":
324 412
 
325  
-    console.log(verbInflector.singularize('become'));
  413
+```javascript
  414
+console.log(verbInflector.singularize('become'));
  415
+```
326 416
 
327 417
 Outputs "become":
328 418
 
329  
-    console.log(verbInflector.pluralize('becomes'));
  419
+```javascript
  420
+console.log(verbInflector.pluralize('becomes'));
  421
+```
330 422
 
331 423
 Like many other natural modules, `attach()` can be used to patch strings with
332 424
 handy methods.
333 425
 
334  
-    verbInflector.attach();
335  
-    console.log('walk'.singularizePresentVerb());
336  
-    console.log('walks'.pluralizePresentVerb());
  426
+```javascript
  427
+verbInflector.attach();
  428
+console.log('walk'.singularizePresentVerb());
  429
+console.log('walks'.pluralizePresentVerb());
  430
+```
337 431
 
338 432
 
339 433
 N-Grams
@@ -342,32 +436,40 @@ N-Grams
342 436
 n-grams can be obtained for either arrays or strings (which will be tokenized
343 437
 for you):
344 438
 
345  
-    var NGrams = natural.NGrams;
  439
+```javascript
  440
+var NGrams = natural.NGrams;
  441
+```
346 442
 
347 443
 ### bigrams
348 444
 
349  
-    console.log(NGrams.bigrams('some words here'));
350  
-    console.log(NGrams.bigrams(['some',  'words',  'here']));
  445
+```javascript
  446
+console.log(NGrams.bigrams('some words here'));
  447
+console.log(NGrams.bigrams(['some',  'words',  'here']));
  448
+```
351 449
 
352  
-Both of the above output: [ [ 'some', 'words' ], [ 'words', 'here' ] ]
  450
+Both of the above output: `[ [ 'some', 'words' ], [ 'words', 'here' ] ]`
353 451
 
354 452
 ### trigrams
355 453
 
356  
-    console.log(NGrams.trigrams('some other words here'));
357  
-    console.log(NGrams.trigrams(['some',  'other', 'words',  'here']));
  454
+```javascript
  455
+console.log(NGrams.trigrams('some other words here'));
  456
+console.log(NGrams.trigrams(['some',  'other', 'words',  'here']));
  457
+```
358 458
 
359  
-Both of the above output: [ [ 'some', 'other', 'words' ],
360  
-  [ 'other', 'words', 'here' ] ]
  459
+Both of the above output: `[ [ 'some', 'other', 'words' ],
  460
+  [ 'other', 'words', 'here' ] ]`
361 461
 
362 462
 ### arbitrary n-grams
363 463
 
364  
-    console.log(NGrams.ngrams('some other words here for you', 4));
365  
-    console.log(NGrams.ngrams(['some', 'other', 'words', 'here', 'for',
366  
-        'you'], 4));
  464
+```javascript
  465
+console.log(NGrams.ngrams('some other words here for you', 4));
  466
+console.log(NGrams.ngrams(['some', 'other', 'words', 'here', 'for',
  467
+    'you'], 4));
  468
+```
367 469
 
368  
-The above outputs: [ [ 'some', 'other', 'words', 'here' ],
  470
+The above outputs: `[ [ 'some', 'other', 'words', 'here' ],
369 471
   [ 'other', 'words', 'here', 'for' ],
370  
-  [ 'words', 'here', 'for', 'you' ] ]
  472
+  [ 'words', 'here', 'for', 'you' ] ]`
371 473
 
372 474
 tf-idf
373 475
 -----
@@ -377,108 +479,126 @@ document relative to a corpus. The following example will add four documents to
377 479
 a corpus and determine the weight of the word "node", then the weight of the
378 480
 word "ruby" in each document.
379 481
 
380  
-    var natural = require('natural'),
381  
-        TfIdf = natural.TfIdf,
382  
-        tfidf = new TfIdf();
  482
+```javascript
  483
+var natural = require('natural'),
  484
+    TfIdf = natural.TfIdf,
  485
+    tfidf = new TfIdf();
383 486
 
384  
-    tfidf.addDocument('this document is about node.');
385  
-    tfidf.addDocument('this document is about ruby.');
386  
-    tfidf.addDocument('this document is about ruby and node.');
387  
-    tfidf.addDocument('this document is about node. it has node examples');
  487
+tfidf.addDocument('this document is about node.');
  488
+tfidf.addDocument('this document is about ruby.');
  489
+tfidf.addDocument('this document is about ruby and node.');
  490
+tfidf.addDocument('this document is about node. it has node examples');
388 491
 
389  
-    console.log('node --------------------------------');
390  
-    tfidf.tfidfs('node', function(i, measure) {
391  
-        console.log('document #' + i + ' is ' + measure);
392  
-    });
  492
+console.log('node --------------------------------');
  493
+tfidf.tfidfs('node', function(i, measure) {
  494
+    console.log('document #' + i + ' is ' + measure);
  495
+});
393 496
 
394  
-    console.log('ruby --------------------------------');
395  
-    tfidf.tfidfs('ruby', function(i, measure) {
396  
-        console.log('document #' + i + ' is ' + measure);
397  
-    });
  497
+console.log('ruby --------------------------------');
  498
+tfidf.tfidfs('ruby', function(i, measure) {
  499
+    console.log('document #' + i + ' is ' + measure);
  500
+});
  501
+```
398 502
 
399 503
 The above outputs:
400 504
 
401  
-    node --------------------------------
402  
-    document #0 is 1.4469189829363254
403  
-    document #1 is 0
404  
-    document #2 is 1.4469189829363254
405  
-    document #3 is 2.8938379658726507
406  
-    ruby --------------------------------
407  
-    document #0 is 0
408  
-    document #1 is 1.466337068793427
409  
-    document #2 is 1.466337068793427
410  
-    document #3 is 0
  505
+```
  506
+node --------------------------------
  507
+document #0 is 1.4469189829363254
  508
+document #1 is 0
  509
+document #2 is 1.4469189829363254
  510
+document #3 is 2.8938379658726507
  511
+ruby --------------------------------
  512
+document #0 is 0
  513
+document #1 is 1.466337068793427
  514
+document #2 is 1.466337068793427
  515
+document #3 is 0
  516
+```
411 517
 
412 518
 This approach can also be applied to individual documents.
413 519
 
414 520
 The following example measures the term "node" in the first and second documents.
415 521
 
416  
-    console.log(tfidf.tfidf('node', 0));
417  
-    console.log(tfidf.tfidf('node', 1));
  522
+```javascript
  523
+console.log(tfidf.tfidf('node', 0));
  524
+console.log(tfidf.tfidf('node', 1));
  525
+```
418 526
 
419 527
 A TfIdf instance can also load documents from files on disk.
420 528
 
421  
-    var tfidf = new TfIdf();
422  
-    tfidf.addFileSync('data_files/one.txt');
423  
-    tfidf.addFileSync('data_files/two.txt');
  529
+```javascript
  530
+var tfidf = new TfIdf();
  531
+tfidf.addFileSync('data_files/one.txt');
  532
+tfidf.addFileSync('data_files/two.txt');
  533
+```
424 534
 
425 535
 Multiple terms can be measured as well, with their weights being added into
426 536
 a single measure value. The following example determines that the last document
427 537
 is the most relevent to the words "node" and "ruby".
428 538
 
429  
-    var natural = require('natural'),
430  
-        TfIdf = natural.TfIdf,
431  
-        tfidf = new TfIdf();
  539
+```javascript
  540
+var natural = require('natural'),
  541
+    TfIdf = natural.TfIdf,
  542
+    tfidf = new TfIdf();
432 543
 
433  
-    tfidf.addDocument('this document is about node.');
434  
-    tfidf.addDocument('this document is about ruby.');
435  
-    tfidf.addDocument('this document is about ruby and node.');
  544
+tfidf.addDocument('this document is about node.');
  545
+tfidf.addDocument('this document is about ruby.');
  546
+tfidf.addDocument('this document is about ruby and node.');
436 547
 
437  
-    tfidf.tfidfs('node ruby', function(i, measure) {
438  
-        console.log('document #' + i + ' is ' + measure);
439  
-    });
  548
+tfidf.tfidfs('node ruby', function(i, measure) {
  549
+    console.log('document #' + i + ' is ' + measure);
  550
+});
  551
+```
440 552
 
441 553
 The above outputs:
442 554
 
443  
-    document #0 is 1.2039728043259361
444  
-    document #1 is 1.2039728043259361
445  
-    document #2 is 2.4079456086518722
  555
+```
  556
+document #0 is 1.2039728043259361
  557
+document #1 is 1.2039728043259361
  558
+document #2 is 2.4079456086518722
  559
+```
446 560
 
447 561
 The examples above all use strings, which case natural to automatically tokenize the input.
448 562
 If you wish to perform your own tokenization or other kinds of processing, you
449 563
 can do so, then pass in the resultant arrays later. This approach allows you to bypass natural's 
450 564
 default preprocessing.
451 565
 
452  
-    var natural = require('natural'),
453  
-        TfIdf = natural.TfIdf,
454  
-        tfidf = new TfIdf();
  566
+```javascript
  567
+var natural = require('natural'),
  568
+    TfIdf = natural.TfIdf,
  569
+    tfidf = new TfIdf();
455 570
 
456  
-    tfidf.addDocument(['document', 'about', 'node']);
457  
-    tfidf.addDocument(['document', 'about', 'ruby']);
458  
-    tfidf.addDocument(['document', 'about', 'ruby', 'node']);
459  
-    tfidf.addDocument(['document', 'about', 'node', 'node', 'examples']);
  571
+tfidf.addDocument(['document', 'about', 'node']);
  572
+tfidf.addDocument(['document', 'about', 'ruby']);
  573
+tfidf.addDocument(['document', 'about', 'ruby', 'node']);
  574
+tfidf.addDocument(['document', 'about', 'node', 'node', 'examples']);
460 575
 
461  
-    tfidf.tfidfs(['node', 'ruby'], function(i, measure) {
462  
-        console.log('document #' + i + ' is ' + measure);
463  
-    });
  576
+tfidf.tfidfs(['node', 'ruby'], function(i, measure) {
  577
+    console.log('document #' + i + ' is ' + measure);
  578
+});
  579
+```
464 580
 
465 581
 It's possible to retrieve a list of all terms in a document, sorted by their
466 582
 importance.
467 583
 
468  
-    tfidf.listTerms(0 /*document index*/).forEach(function(item) {
469  
-        console.log(item.term + ': ' + item.tfidf);
470  
-    });
  584
+```javascript
  585
+tfidf.listTerms(0 /*document index*/).forEach(function(item) {
  586
+    console.log(item.term + ': ' + item.tfidf);
  587
+});
  588
+```
471 589
 
472 590
 A TfIdf instance can also be serialized and deserialzed for save and recall.
473 591
 
474  
-    var tfidf = new TfIdf();
475  
-    tfidf.addDocument('document one', 'un');
476  
-    tfidf.addDocument('document Two', 'deux');
477  
-    var s = JSON.stringify(tfidf);
478  
-    // save "s" to disk, database or otherwise
  592
+```javascript
  593
+var tfidf = new TfIdf();
  594
+tfidf.addDocument('document one', 'un');
  595
+tfidf.addDocument('document Two', 'deux');
  596
+var s = JSON.stringify(tfidf);
  597
+// save "s" to disk, database or otherwise
479 598
 
480  
-    // assuming you pulled "s" back out of storage.
481  
-    var tfidf = new TfIdf(JSON.parse(s));
  599
+// assuming you pulled "s" back out of storage.
  600
+var tfidf = new TfIdf(JSON.parse(s));
  601
+```
482 602
 
483 603
 WordNet
484 604
 -------
@@ -496,35 +616,41 @@ and not production-ready. The API is also subject to change.
496 616
 
497 617
 Here's an exmple of looking up definitions for the word, "node".
498 618
 
499  
-    var wordnet = new natural.WordNet();
500  
-
501  
-    wordnet.lookup('node', function(results) {
502  
-        results.forEach(function(result) {
503  
-            console.log('------------------------------------');
504  
-            console.log(result.synsetOffset);
505  
-            console.log(result.pos);
506  
-            console.log(result.lemma);
507  
-            console.log(result.synonyms);
508  
-            console.log(result.pos);
509  
-            console.log(result.gloss);
510  
-        });
511  
-    });
512  
-
513  
-Given a synset offset and a part of speech, a definition can be looked up directly.
514  
-
515  
-    var wordnet = new natural.WordNet();
  619
+```javascript
  620
+var wordnet = new natural.WordNet();
516 621
 
517  
-    wordnet.get(4424418, 'n', function(result) {
  622
+wordnet.lookup('node', function(results) {
  623
+    results.forEach(function(result) {
518 624
         console.log('------------------------------------');
  625
+        console.log(result.synsetOffset);
  626
+        console.log(result.pos);
519 627
         console.log(result.lemma);
  628
+        console.log(result.synonyms);
520 629
         console.log(result.pos);
521 630
         console.log(result.gloss);
522  
-        console.log(result.synonyms);
523 631
     });
  632
+});
  633
+```
  634
+
  635
+Given a synset offset and a part of speech, a definition can be looked up directly.
  636
+
  637
+```javascript
  638
+var wordnet = new natural.WordNet();
  639
+
  640
+wordnet.get(4424418, 'n', function(result) {
  641
+    console.log('------------------------------------');
  642
+    console.log(result.lemma);
  643
+    console.log(result.pos);
  644
+    console.log(result.gloss);
  645
+    console.log(result.synonyms);
  646
+});
  647
+```
524 648
 
525 649
 If you have _manually_ downloaded the WordNet database files, you can pass the folder to the constructor:
526 650
 
527  
-    var wordnet = new natural.WordNet('/my/wordnet/dict');
  651
+```javascript
  652
+var wordnet = new natural.WordNet('/my/wordnet/dict');
  653
+```
528 654
 
529 655
 As of v0.1.11, WordNet data files are no longer automatically downloaded.
530 656
 

0 notes on commit 970b5bf

Please sign in to comment.
Something went wrong with that request. Please try again.