Skip to content

Commit

Permalink
Merge pull request #28 from sidred123/master
Browse files Browse the repository at this point in the history
Module for Levenshtein Distance
  • Loading branch information
chrisumbel committed Mar 22, 2012
2 parents 81ac9c6 + 9002aa4 commit 1ac29c3
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 1 deletion.
41 changes: 41 additions & 0 deletions lib/natural/distance/levenshtein_distance.js
@@ -0,0 +1,41 @@
/*
* Compute the Levenshtein distance between two strings.
* Algorithm based from Speech and Language Processing - Daniel Jurafsky and James H. Martin.
*/

function LevenshteinDistance (source, target, options) {

options = options || {};
options.insertion_cost = options.insertion_cost || 1;
options.deletion_cost = options.deletion_cost || 1;
options.substitution_cost = options.substitution_cost || 2;

var sourceLength = source.length;
var targetLength = target.length;
var distanceMatrix = [[0]];
for (var row = 1; row <= sourceLength; row++) {
distanceMatrix[row] = [];
distanceMatrix[row][0] = distanceMatrix[row-1][0] + options.insertion_cost;
}
for (var column = 1; column <= targetLength; column++) {
distanceMatrix[0][column] = distanceMatrix[column-1][0] + options.deletion_cost;
}

for (var row = 1; row <= sourceLength; row++) {
for (var column = 1; column <= targetLength; column++) {
var costToInsert = distanceMatrix[row-1][column] + options.insertion_cost;
var costToDelete = distanceMatrix[row][column-1] + options.deletion_cost;

var sourceElement = source[row-1];
var targetElement = target[column-1];
var costToSubstitute = distanceMatrix[row-1][column-1];
if (sourceElement !== targetElement) {
costToSubstitute = costToSubstitute + options.substitution_cost;
}
distanceMatrix[row][column] = Math.min(costToInsert, costToDelete, costToSubstitute);
}
}
return distanceMatrix[sourceLength][targetLength];
}

module.exports = LevenshteinDistance;
3 changes: 2 additions & 1 deletion lib/natural/index.js
Expand Up @@ -39,4 +39,5 @@ exports.TfIdf = require('./tfidf/tfidf');
exports.SentenceAnalyzer = require('./analyzers/sentence_analyzer');
exports.stopwords = require('./util/stopwords').words;
exports.NGrams = require('./ngrams/ngrams');
exports.JaroWinklerDistance = require('./distance/jaro-winkler_distance');
exports.JaroWinklerDistance = require('./distance/jaro-winkler_distance');
exports.LevenshteinDistance = require('./distance/levenshtein_distance');

0 comments on commit 1ac29c3

Please sign in to comment.