Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
adding custom objects for freqdist and idf
- Loading branch information
Alexander Behrens
committed
Apr 11, 2016
1 parent
3152af6
commit 501d146
Showing
8 changed files
with
341 additions
and
60 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
/** | ||
* Create an freqDist object. | ||
*/ | ||
|
||
|
||
'use strict'; | ||
|
||
|
||
function FreqDist(_text) { | ||
|
||
var _cache = {}; | ||
var _ordered; | ||
var _size; | ||
var _count; | ||
var _tokens; | ||
|
||
if (_text) { | ||
add(_text); | ||
} | ||
|
||
function add(_text, _count) { | ||
_reset(); | ||
|
||
if (_text.hasOwnProperty('merge') && typeof _text['merge'] === 'function') { | ||
_cache = _text.merge(_cache); | ||
return this; | ||
} | ||
|
||
_text = [].concat(_text); | ||
if (typeof _count === 'undefined') { | ||
_count = 1; | ||
} | ||
_text.forEach(function addToken(_token) { | ||
if (!_cache.hasOwnProperty(_token)) { | ||
_cache[_token] = 0; | ||
} | ||
_cache[_token] += _count; | ||
}); | ||
return this; | ||
} | ||
|
||
function merge(external) { | ||
for (var key in _cache) { | ||
if (!external.hasOwnProperty(key)) { | ||
external[key] = 0; | ||
} | ||
external[key] += _cache[key]; | ||
} | ||
return external; | ||
} | ||
|
||
function count(token) { | ||
if (token) { | ||
return (_cache.hasOwnProperty(token)) ? _cache[token] : 0; | ||
} | ||
return (typeof _count !== 'undefined') ? _count : _count = Object.keys(_cache).reduce(function (p, c) { | ||
return p + _cache[c]; | ||
}, 0); | ||
} | ||
|
||
function range(start, end) { | ||
start = (typeof start !== 'undefined') ? start : 0; | ||
end = (typeof end !== 'undefined') ? end : size(); | ||
if (typeof _ordered === 'undefined') { | ||
var __count = count(); | ||
_ordered = tokens().map(function (token) { | ||
return { | ||
name: token, | ||
count: _cache[token], | ||
tf: _cache[token] / __count | ||
}; | ||
}); | ||
_ordered.sort(function (a, b) { | ||
return a.count > b.count ? 1 : -1; | ||
}); | ||
} | ||
return _ordered.slice(start, end); | ||
} | ||
|
||
function size() { | ||
return (typeof _size !== 'undefined') ? _size : _size = Object.keys(_cache).length; | ||
} | ||
|
||
function tokens() { | ||
return (typeof _tokens !== 'undefined') ? _tokens : _tokens = Object.keys(_cache); | ||
} | ||
|
||
function _reset() { | ||
_count = undefined; | ||
_ordered = undefined; | ||
_size = undefined; | ||
_tokens = undefined; | ||
} | ||
|
||
return { | ||
add: add, | ||
count: count, | ||
range: range, | ||
size: size, | ||
tokens: tokens | ||
}; | ||
|
||
} | ||
|
||
module.exports = FreqDist; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
/** | ||
* Create an Idf object. | ||
*/ | ||
|
||
|
||
'use strict'; | ||
|
||
|
||
var FreqDist = require('./').FreqDist; | ||
|
||
|
||
function Idf(freqDists) { | ||
|
||
var _cache = {}; | ||
var _ordered; | ||
var _size; | ||
var _tokens; | ||
var documents = 0; | ||
|
||
if (freqDists) { | ||
add(freqDists); | ||
} | ||
|
||
function add(freqDists) { | ||
_reset(); | ||
freqDists = [].concat(freqDists); | ||
freqDists.forEach(function (freqDist) { | ||
documents += 1; | ||
var _freqDist = (freqDist.hasOwnProperty('tokens') && typeof freqDist.tokens === 'function') ? freqDist : FreqDist(freqDist); | ||
_freqDist.tokens().forEach(function (token) { | ||
if (!_cache.hasOwnProperty(token)) { | ||
_cache[token] = 0; | ||
} | ||
_cache[token] += 1; | ||
}); | ||
}); | ||
return this; | ||
} | ||
|
||
function get(token) { | ||
if (!_cache.hasOwnProperty(token)) { | ||
return 0; | ||
} | ||
return Math.log(documents / _cache[token]); | ||
} | ||
|
||
function range(start, end) { | ||
start = (typeof start !== 'undefined') ? start : 0; | ||
end = (typeof end !== 'undefined') ? end : size() - 1; | ||
if (typeof _ordered === 'undefined') { | ||
_ordered = tokens().map(function (token) { | ||
return { | ||
name: token, | ||
count: _cache[token], | ||
idf: get(token) | ||
}; | ||
}); | ||
_ordered.sort(function (a, b) { | ||
return a.count > b.count ? 1 : -1; | ||
}); | ||
} | ||
return _ordered.slice(start, end); | ||
} | ||
|
||
function size() { | ||
return (typeof _size !== 'undefined') ? _size : _size = tokens().length; | ||
} | ||
|
||
function tokens() { | ||
return (typeof _tokens !== 'undefined') ? _tokens : _tokens = Object.keys(_cache); | ||
} | ||
|
||
function _reset() { | ||
_size = undefined; | ||
_tokens = undefined; | ||
} | ||
|
||
return { | ||
add: add, | ||
get: get, | ||
range: range, | ||
size: size, | ||
tokens: tokens | ||
}; | ||
|
||
} | ||
|
||
module.exports = Idf; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
module.exports.Idf = require('./idf.js'); | ||
module.exports.FreqDist = require('./freqDist.js'); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.