Skip to content
This repository has been archived by the owner on May 10, 2023. It is now read-only.

Commit

Permalink
fix: add sentence validator for Cantonese (#605)
Browse files Browse the repository at this point in the history
  • Loading branch information
laubonghaudoi authored Feb 15, 2022
1 parent a5cf871 commit 18fa4b2
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 0 deletions.
2 changes: 2 additions & 0 deletions server/lib/validation/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ const ru = require('./languages/ru');
const th = require('./languages/th');
const ur = require('./languages/ur');
const uz = require('./languages/uz');
const yue = require('./languages/yue');

const VALIDATORS = {
bas,
Expand All @@ -27,6 +28,7 @@ const VALIDATORS = {
th,
ur,
uz,
yue,
};

module.exports = {
Expand Down
34 changes: 34 additions & 0 deletions server/lib/validation/languages/yue.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
// Minimum of characters that qualify as a sentence.
const MIN_LENGTH = 3;

// Maximum of characters allowed per sentence to keep recordings in a manageable duration.
const MAX_LENGTH = 50;

const INVALIDATIONS = [{
fn: (sentence) => {
return sentence.length < MIN_LENGTH || sentence.length > MAX_LENGTH;
},
error: `字數必須要喺 ${MIN_LENGTH}${MAX_LENGTH} 之間`,
}, {
regex: /[0-9]+/,
error: "句子唔可以包含阿拉伯數字",
}, {
regex: /[<>+*#@%^[\]()/]/,
error: "句子唔可以有特殊符號",
}, {
// 7 or more repeating characters in a row is likely a non-formal spelling or difficult to read.
regex: /(.)\1{6}/,
error: "唔可以有連續 7 個或以上重複字元",
}, {
// Emoji range from https://www.regextester.com/106421 and
// https://stackoverflow.com/questions/10992921/how-to-remove-emoji-code-using-javascript
regex: /(\u00a9|\u00ae|[\u2000-\u3300]|[\u2580-\u27bf]|\ud83c[\ud000-\udfff]|\ud83d[\ud000-\udfff]|\ud83e[\ud000-\udfff]|[\ue000-\uf8ff])/,
error: "句子唔可以含有 emoji 或者其他特殊 Unicode 符號",
}, {
regex: /[\u5427\u5504\u5436](\s|\u3002|\u002E|\uFF0C|\u002C|$)/,
error: '句子唔可以有官話語氣詞(例如吧、唄、吶)',
}];

module.exports = {
INVALIDATIONS,
};

0 comments on commit 18fa4b2

Please sign in to comment.