Skip to content

Commit

Permalink
added X handling
Browse files Browse the repository at this point in the history
  • Loading branch information
chrisumbel committed Feb 13, 2012
1 parent 85109cf commit dd4c205
Show file tree
Hide file tree
Showing 2 changed files with 59 additions and 26 deletions.
61 changes: 37 additions & 24 deletions lib/natural/phonetics/double_metaphone.js
Expand Up @@ -54,6 +54,30 @@ function process(token, spyCallback) {
add(encoded || c);
}

function handleD() {
if(token.substring(pos, pos + 2) == 'DG') {
if(['I', 'E', 'Y'].indexOf(token[pos + 2]) > -1) {
add('J');
pos += 2;
} else {
add('TK');
pos++;
}
} else if(token.substring(pos, pos + 2) == 'DT') {
add('T');
pos++;
} else
addCompressedDouble('D', 'T');
}

function handleH() {
// keep if starts a word or is surrounded by vowels
if((pos == 0 || isVowel(token[pos - 1])) && isVowel(token[pos + 1])) {
add('H');
pos++;
}
}

function handleL() {
if(token[pos + 1] == 'L') {
if(pos == token.length - 3 && (
Expand Down Expand Up @@ -90,14 +114,6 @@ function process(token, spyCallback) {
}
}

function handleH() {
// keep if starts a word or is surrounded by vowels
if((pos == 0 || isVowel(token[pos - 1])) && isVowel(token[pos + 1])) {
add('H');
pos++;
}
}

function handleR() {
if(pos == token.length - 1 && !slavoGermanic
&& token.substring(pos - 2, pos) == 'IE'
Expand All @@ -107,28 +123,22 @@ function process(token, spyCallback) {
addCompressedDouble('R');
}

function handleD() {
if(token.substring(pos, pos + 2) == 'DG') {
if(['I', 'E', 'Y'].indexOf(token[pos + 2]) > -1) {
add('J');
pos += 2;
} else {
add('TK');
pos++;
}
} else if(token.substring(pos, pos + 2) == 'DT') {
add('T');
pos++;
} else
addCompressedDouble('D', 'T');
function handleX() {
if(pos == 0) {
add('S');
} else if(!(pos == token.length - 1
&& (['IAU', 'EAU', 'IEU'].indexOf(token.substring(pos - 3, pos)) > -1
|| ['AU', 'OU'].indexOf(token.substring(pos - 2, pos)) > -1))) {
add('KS');
}
}

function handleZ() {
if(token[pos + 1] == 'H') {
add('J');
pos++;
} else if(['ZO', 'ZI', 'ZA'].indexOf(token.substring(pos + 1, pos + 3)) > -1 ||
(slavoGermanic && pos > 0 && token[pos - 1] != 'T')) {
} else if(['ZO', 'ZI', 'ZA'].indexOf(token.substring(pos + 1, pos + 3)) > -1
|| (slavoGermanic && pos > 0 && token[pos - 1] != 'T')) {
addSecondary('S', 'TS');
pos++;
} else
Expand Down Expand Up @@ -186,6 +196,9 @@ function process(token, spyCallback) {
case 'V':
addCompressedDouble('V', 'F');
break;
case 'X':
handleX();
break;
case 'Z':
handleZ();
break;
Expand Down
24 changes: 22 additions & 2 deletions spec/double_metaphone_spec.js
Expand Up @@ -249,7 +249,7 @@ describe('double metaphone', function() {
expect(encodings[1]).toMatch(/^R.*/);
});

it('should ignore trailing french Rs', function() {
it('should ignore trailing French Rs', function() {
var encodings = doubleMetaphone.process('papier');
expect(encodings[0]).toMatch(/.*[^R]$/);
expect(encodings[1]).toMatch(/.*R$/);
Expand All @@ -274,14 +274,34 @@ describe('double metaphone', function() {
});
});

describe('X', function() {
it('should encode X as S at start', function() {
var encodings = doubleMetaphone.process('xenophobia');
expect(encodings[0]).toMatch(/^S.*/);
expect(encodings[1]).toMatch(/^S.*/);
});

it('should encode X as KS at end for non-French words', function() {
var encodings = doubleMetaphone.process('box');
expect(encodings[0]).toMatch(/.*KS$/);
expect(encodings[1]).toMatch(/.*KS$/);
});

it('should skip X end for French words', function() {
var encodings = doubleMetaphone.process('lemieux');
expect(encodings[0]).toNotMatch(/.*KS$/);
expect(encodings[1]).toNotMatch(/.*KS$/);
});
});

describe('Z', function() {
it('should encode Z to S', function() {
var encodings = doubleMetaphone.process('zookeeper');
expect(encodings[0]).toMatch(/^S.*$/);
expect(encodings[1]).toMatch(/^S.*$/);
});

it('should encode chinese ZH to J', function() {
it('should encode Chinese ZH to J', function() {
var encodings = doubleMetaphone.process('zheng');
expect(encodings[0]).toMatch(/^J.*$/);
expect(encodings[1]).toMatch(/^J.*$/);
Expand Down

0 comments on commit dd4c205

Please sign in to comment.