Skip to content

Commit

Permalink
T handling
Browse files Browse the repository at this point in the history
  • Loading branch information
chrisumbel committed Feb 14, 2012
1 parent 257eb7f commit 18c4aff
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 2 deletions.
20 changes: 19 additions & 1 deletion lib/natural/phonetics/double_metaphone.js
Expand Up @@ -123,8 +123,26 @@ function process(token, spyCallback) {
}

function handleT() {
if(token.substring(pos + 1, pos + 4) == 'ION' || subMatch(1, 3, ['IA', 'CH'])) {
if(token.substring(pos + 1, pos + 4) == 'ION') {
add('XN');
pos += 3;
} else if(subMatch(1, 3, ['IA', 'CH'])) {
add('X');
pos += 2;
} else if(token[pos + 1] == 'H'
|| token.substring(1, 2) == 'TH') {
if(subMatch(2, 4, ['OM', 'AM'])
|| ['VAN ', 'VON '].indexOf(token.substring(0, 4)) > -1
|| token.substring(0, 3) == 'SCH') {
add('T');
} else
addSecondary('0', 'T');
pos++;
} else {
addCompressedDouble('T');

if(token[pos + 1] == 'D')
pos++;
}
}

Expand Down
35 changes: 34 additions & 1 deletion spec/double_metaphone_spec.js
Expand Up @@ -257,9 +257,42 @@ describe('double metaphone', function() {
});

describe('T', function() {
it('should encode T', function() {
it('should encode TION to XN', function() {
var encodings = doubleMetaphone.process('nation');
expect(encodings[0]).toMatch(/.*XN$/);
expect(encodings[1]).toMatch(/.*XN$/);
});

it('should encode CH sounds to X', function() {
var encodings = doubleMetaphone.process('thatch');
expect(encodings[0]).toMatch(/.*X$/);
expect(encodings[1]).toMatch(/.*X$/);
});

it('should encode hard TH to T', function() {
var encodings = doubleMetaphone.process('thomas');
expect(encodings[0]).toMatch(/^T.*/);
expect(encodings[1]).toMatch(/^T.*/);
});

it('should encode soft TH to 0,T', function() {
var encodings = doubleMetaphone.process('this');
expect(encodings[0]).toMatch(/^0.*/);
expect(encodings[1]).toMatch(/^T.*/);
});

it('should encode TT to T', function() {
var encodings = doubleMetaphone.process('matta');
expect(encodings[0]).toMatch(/[^T]T/);
expect(encodings[1]).toMatch(/[^T]T/);
});

it('should encode TD to T', function() {
var encodings = doubleMetaphone.process('countdown');
expect(encodings[0]).toContain('T');
expect(encodings[0]).toNotContain('D');
expect(encodings[1]).toContain('T');
expect(encodings[1]).toNotContain('D');
});
});

Expand Down

0 comments on commit 18c4aff

Please sign in to comment.