Skip to content

Commit

Permalink
Merge pull request #99 from gursevak/malformed-ascii-corrections
Browse files Browse the repository at this point in the history
Updated unicode conversion to handle common ascii ordering typos
  • Loading branch information
irvanjitsingh committed Apr 14, 2021
2 parents bd87261 + 5cf43d6 commit 9e29a65
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 3 deletions.
9 changes: 9 additions & 0 deletions src/__tests__/unicode.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,15 @@ describe('unicode', () => {
/* expect(unicode('lY AkR¨r kau Awpny sMg kihXo Ary kwnRh khMŧ kau pDwirXo ]'))
.toBe('ਲੈ ਅਕ੍ਰੂਰ ਕਉ ਆਪਨੇ ਸੰਗ ਕਹਿਯੋ ਅਰੇ ਕਾਨ੍ਰਹ ਕਹੰŧ ਕਉ ਪਧਾਰਿਯੋ ॥'); */

expect(unicode('kW@n'))
.toBe('ਕੑਾਂਨ');

expect(unicode('s`uD ispwh durMq dubwh su swj snwh durjwn dlYNgy ]'))
.toBe('ਸੁੱਧ ਸਿਪਾਹ ਦੁਰੰਤ ਦੁਬਾਹ ਸੁ ਸਾਜ ਸਨਾਹ ਦੁਰਜਾਨ ਦਲੈਂਗੇ ॥');

expect(unicode('sq`Rün ko pl mo bD kIE ]386]'))
.toBe('ਸਤ੍ਰੁੱਨ ਕੋ ਪਲ ਮੋ ਬਧ ਕੀਓ ॥੩੮੬॥');

expect(unicode('slok mÚ 3 ]'))
.toBe('ਸਲੋਕ ਮਃ ੩ ॥');

Expand Down
43 changes: 40 additions & 3 deletions src/unicode.js
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,38 @@ const reverseMapping = {
'': '₆',
};

const asciiCorrections = [
'@W',
'@w',
'@o',
'@O',
'@y',
'@Y',
'@ü',
'@`',
'ÍY',
'Ry',
'RY',
'RM',
'RN',
'YN',
'yN',
'YM',
'yM',
'uN',
'UN',
'üN',
'uM',
'UM',
'üM',
'R`',
'u`',
'U`',
'ü`',
'Iˆ',
'IN',
];

const halfChars = [
'H',
'R',
Expand Down Expand Up @@ -381,11 +413,16 @@ function unicode(text = '', reverse = false, simplify = false) {

let convertedText = '';

const chars = text
let str = text
.replace(/>/gi, '')
.replace(/Ø/gi, '')
.replace(/Æ/g, '')
.split('');
.replace(/Æ/g, '');

asciiCorrections.forEach((e) => {
str = str.replace(new RegExp(e.split('').reverse().join(''), 'g'), e);
});

const chars = str.split('');

for (let j = 0; j < chars.length; j += 1) {
const currentChar = chars[j];
Expand Down

0 comments on commit 9e29a65

Please sign in to comment.