Permalink
Browse files

Merge pull request #292 from jdesboeufs/master

Add missing latin diacritic
  • Loading branch information...
2 parents c6e1a5e + 502d1f2 commit 5bbff4217b26cfaa1082cded2ae00e34b48c0bc6 @kkoch986 kkoch986 committed Jun 4, 2016
Showing with 4 additions and 2 deletions.
  1. +2 −0 lib/natural/normalizers/remove_diacritics.js
  2. +2 −2 spec/remove_diacritics_spec.js
View
2 lib/natural/normalizers/remove_diacritics.js
@@ -52,6 +52,7 @@ var diacriticsRemovalMap = [
{'base':'NJ','letters':/[\u01CA]/g},
{'base':'Nj','letters':/[\u01CB]/g},
{'base':'O', 'letters':/[\u004F\u24C4\uFF2F\u00D2\u00D3\u00D4\u1ED2\u1ED0\u1ED6\u1ED4\u00D5\u1E4C\u022C\u1E4E\u014C\u1E50\u1E52\u014E\u022E\u0230\u00D6\u022A\u1ECE\u0150\u01D1\u020C\u020E\u01A0\u1EDC\u1EDA\u1EE0\u1EDE\u1EE2\u1ECC\u1ED8\u01EA\u01EC\u00D8\u01FE\u0186\u019F\uA74A\uA74C]/g},
+ {'base':'OE','letters':/[\u0152]/g},
{'base':'OI','letters':/[\u01A2]/g},
{'base':'OO','letters':/[\uA74E]/g},
{'base':'OU','letters':/[\u0222]/g},
@@ -93,6 +94,7 @@ var diacriticsRemovalMap = [
{'base':'n', 'letters':/[\u006E\u24DD\uFF4E\u01F9\u0144\u00F1\u1E45\u0148\u1E47\u0146\u1E4B\u1E49\u019E\u0272\u0149\uA791\uA7A5]/g},
{'base':'nj','letters':/[\u01CC]/g},
{'base':'o', 'letters':/[\u006F\u24DE\uFF4F\u00F2\u00F3\u00F4\u1ED3\u1ED1\u1ED7\u1ED5\u00F5\u1E4D\u022D\u1E4F\u014D\u1E51\u1E53\u014F\u022F\u0231\u00F6\u022B\u1ECF\u0151\u01D2\u020D\u020F\u01A1\u1EDD\u1EDB\u1EE1\u1EDF\u1EE3\u1ECD\u1ED9\u01EB\u01ED\u00F8\u01FF\u0254\uA74B\uA74D\u0275]/g},
+ {'base':'oe','letters':/[\u0153]/g},
{'base':'oi','letters':/[\u01A3]/g},
{'base':'ou','letters':/[\u0223]/g},
{'base':'oo','letters':/[\uA74F]/g},
View
4 spec/remove_diacritics_spec.js
@@ -25,7 +25,7 @@ var removeDiacritics = require("../lib/natural/normalizers/remove_diacritics.js"
describe('remove_diacritics', function() {
it('should correctly remove diacritics', function () {
- var original_phrase = 'piñon ça va über résumé';
- expect(removeDiacritics(original_phrase)).toEqual('pinon ca va uber resume');
+ var original_phrase = 'piñon ça va über résumé œdipe';
+ expect(removeDiacritics(original_phrase)).toEqual('pinon ca va uber resume oedipe');
});
});

0 comments on commit 5bbff42

Please sign in to comment.