Skip to content

Commit

Permalink
Fixes validation error in several yaml files (#1500)
Browse files Browse the repository at this point in the history
* Fixes validation error in several YAML files

* Correcting YAML definition merge & regen resources.
  • Loading branch information
Justin Wilaby authored and tellarin committed Mar 21, 2019
1 parent 9c1d78e commit bf2e3c6
Show file tree
Hide file tree
Showing 9 changed files with 38 additions and 68 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,10 @@ public static class DateTimeDefinitions
public static readonly string FullTextYearRegex = $@"\b((?<firsttwoyearnum>{CenturyRegex})\s+(?<lasttwoyearnum>{LastTwoYearNumRegex})\b|\b(?<firsttwoyearnum>{WrittenCenturyFullYearRegex}|{WrittenCenturyOrdinalYearRegex}\s+hundred(\s+and)?))\b";
public const string OclockRegex = @"(?<oclock>uur)";
public const string SpecialDescRegex = @"(p\b)";
public const string AmDescRegex = @"(am\b|a\.m\.|a m\b|a\. m\.|a\.m\b|a\. m\b|a m\b)";
public const string PmDescRegex = @"(pm\b|p\.m\.|p\b|p m\b|p\. m\.|p\.m\b|p\. m\b|p m\b)";
public const string AmPmDescRegex = @"(ampm)";
public static readonly string DescRegex = $@"((({OclockRegex}\s+)?(?<desc>ampm|am\b|a\.m\.|a m\b|a\. m\.|a\.m\b|a\. m\b|a m\b|pm\b|p\.m\.|p m\b|p\. m\.|p\.m\b|p\. m\b|p\b|p m\b))|{OclockRegex})";
public static readonly string AmDescRegex = $@"({BaseDateTime.BaseAmDescRegex})";
public static readonly string PmDescRegex = $@"({BaseDateTime.BasePmDescRegex})";
public static readonly string AmPmDescRegex = $@"({BaseDateTime.BaseAmPmDescRegex})";
public static readonly string DescRegex = $@"((({OclockRegex}\s+)?(?<desc>({AmPmDescRegex}|{AmDescRegex}|{PmDescRegex}|{SpecialDescRegex})))|{OclockRegex})";
public static readonly string TwoDigitYearRegex = $@"\b(?<![$])(?<year>([0-27-9]\d))(?!(\s*((\:)|{AmDescRegex}|{PmDescRegex}|\.\d)))\b";
public static readonly string YearRegex = $@"({BaseDateTime.FourDigitYearRegex}|{FullTextYearRegex})";
public const string WeekDayRegex = @"\b(?<weekday>maandag|dinsdag|woensdag|donderdag|vrijdag|zaterdag|zondag|ma|ma.|di|di.|wo|wo.|woe|woe.|do|do.|vr|vr.|vrij|za|za.|zat|zat.|zo|zo.)s?\b";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -360,7 +360,6 @@ public static class TimeZoneDefinitions
{ @"et", -240 },
{ @"eat", 180 },
{ @"eet", 120 },
{ @"eest", 180 },
{ @"esat", -180 },
{ @"esast", -180 },
{ @"est", -300 },
Expand Down Expand Up @@ -388,6 +387,7 @@ public static class TimeZoneDefinitions
{ @"isdt", 120 },
{ @"jst", 540 },
{ @"jdt", 120 },
{ @"eest", 180 },
{ @"pett", 720 },
{ @"kst", -10000 },
{ @"lint", 840 },
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ public static class DateTimeDefinitions
{
public const string MonthRegex = @"(?<month>正月|一月|二月|三月|四月|五月|六月|七月|八月|九月|十月|十一月|十二月|01月|02月|03月|04月|05月|06月|07月|08月|09月|10月|11月|12月|1月|2月|3月|4月|5月|6月|7月|8月|9月)";
public const string MonthRegexForPeriod = @"(?<month>正月|一月|二月|三月|四月|五月|六月|七月|八月|九月|十月|十一月|十二月|01月|02月|03月|04月|05月|06月|07月|08月|09月|10月|11月|12月|1月|2月|3月|4月|5月|6月|7月|8月|9月)(?=\b|t|まで|から)?";
public const string DayRegex = @"(?<day>01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|1|2|3|4|5|6|7|8|9)日?";
public const string MonthNumRegexForPeriod = @"(?<month>01|02|03|04|05|06|07|08|09|10|11|12|1|2|3|4|5|6|7|8|9)(?=\b|t|まで|から)?";
public const string DayRegex = @"(?<day>01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|1|2|3|4|5|6|7|8|9)日?";
public const string DayRegexForPeriod = @"(?<day>01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|1|20|21|22|23|24|25|26|27|28|29|2|30|31|3|4|5|6|7|8|9)日?(?=\b|t|まで|から)?";
public const string DateDayRegexInJapanese = @"(?<day>初一|三十|一日|十一日|二十一日|三十一日|二日|三日|四日|五日|六日|七日|八日|九日|十二日|十三日|十四日|十五日|十六日|十七日|十八日|十九日|二十二日|二十三日|二十四日|二十五日|二十六日|二十七日|二十八日|二十九日|一日|十一日|十日|二十一日|二十日|三十一日|三十日|二日|三日|四日|五日|六日|七日|八日|九日|十二日|十三日|十四日|十五日|十六日|十七日|十八日|十九日|二十二日|二十三日|二十四日|二十五日|二十六日|二十七日|二十八日|二十九日|十日|二十日|三十日|10日|11日|12日|13日|14日|15日|16日|17日|18日|19日|1日|20日|21日|22日|23日|24日|25日|26日|27日|28日|29日|2日|30日|31日|3日|4日|5日|6日|7日|8日|9日|一号|十一号|二十一号|三十一号|二号|三号|四号|五号|六号|七号|八号|九号|十二号|十三号|十四号|十五号|十六号|十七号|十八号|十九号|二十二号|二十三号|二十四号|二十五号|二十六号|二十七号|二十八号|二十九号|一号|十一号|十号|二十一号|二十号|三十一号|三十号|二号|三号|四号|五号|六号|七号|八号|九号|十二号|十三号|十四号|十五号|十六号|十七号|十八号|十九号|二十二号|二十三号|二十四号|二十五号|二十六号|二十七号|二十八号|二十九号|十号|二十号|三十号|10号|11号|12号|13号|14号|15号|16号|17号|18号|19号|1号|20号|21号|22号|23号|24号|25号|26号|27号|28号|29号|2号|30号|31号|3号|4号|5号|6号|7号|8号|9号)";
public const string DayRegexNumInJapanese = @"(?<day>一|十一|二十一|三十一|二|三|四|五|六|七|八|九|十二|十三|十四|十五|十六|十七|十八|十九|二十二|二十三|二十四|二十五|二十六|二十七|二十八|二十九|一|十一|十|二十一|二十|三十一|三十|二|三|四|五|六|七|八|九|十二|十三|十四|十五|十六|十七|十八|十九|二十二|二十三|二十四|二十五|二十六|二十七|二十八|二十九|十|二十|廿|卅)";
Expand Down
2 changes: 1 addition & 1 deletion Patterns/Base-DateTime.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
HourRegex: !simpleRegex
HourRegex: !simpleRegex
def: (?<hour>00|01|02|03|04|05|06|07|08|09|0|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|1|2|3|4|5|6|7|8|9)(h)?
TwoDigitHourRegex: !simpleRegex
def: (?<hour>00|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24)(h)?
Expand Down
24 changes: 3 additions & 21 deletions Patterns/Dutch/Dutch-DateTime.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -183,15 +183,11 @@ MonthOfRegex: !simpleRegex
def: (maand)(\s*)(van)
MonthRegex: !simpleRegex
def: (?<month>januari|februari|maart|april|mei|juni|juli|augustus|september|oktober|december|jan|feb|mar|apr|jun|jul|aug|sep|sept|oct|okt|nov|dec)
#Cases collected from mined data
# Cases collected from mined data
AmbiguousMonthP0Regex: !simpleRegex
def: \b((^may i)|(i|you|he|she|we|they)\s+may|(may\s+((((also|not|(also not)|well)\s+)?(be|contain|constitute|email|e-mail|take|have|result|involve|get|work|reply))|(or may not))))\b
AmDescRegex: !simpleRegex
def: (am\b|a\.m\.|a m\b|a\. m\.|a\.m\b|a\. m\b|a m\b)
PmDescRegex: !simpleRegex
def: (pm\b|p\.m\.|p\b|p m\b|p\. m\.|p\.m\b|p\. m\b|p m\b)
#This is a look-behind assertion. Some cases should extract two digits as year like 11/25/16, where 16 means 2016.
#The assertion determines if not connected with am/pm or hour separator (:), which should be a time.
# This is a look-behind assertion. Some cases should extract two digits as year like 11/25/16, where 16 means 2016.
# The assertion determines if not connected with am/pm or hour separator (:), which should be a time.
DateYearRegex: !nestedRegex
def: (?<year>((1\d|20)\d{2})|2100|(([0-27-9]\d)\b(?!(\s*((\:)|{AmDescRegex}|{PmDescRegex})))))
references: [ AmDescRegex, PmDescRegex ]
Expand Down Expand Up @@ -287,11 +283,6 @@ WeekDayEnd: !nestedRegex
references: [ WeekDayRegex ]
RangeUnitRegex: !simpleRegex
def: \b(?<unit>jaren|jaar|maanden|maand|weken|week)\b
OclockRegex: !simpleRegex
def: (?<oclock>uur)
DescRegex: !nestedRegex
def: ((({OclockRegex}\s+)?(?<desc>ampm|am\b|a\.m\.|a m\b|a\. m\.|a\.m\b|a\. m\b|a m\b|pm\b|p\.m\.|p m\b|p\. m\.|p\.m\b|p\. m\b|p\b|p m\b))|{OclockRegex})
references: [ OclockRegex ]
HourNumRegex: !simpleRegex
def: \b(?<hournum>nul|een|één|twee|drie|vier|vijf|zes|zeven|acht|negen|tien|elf|twaalf|dertien|veertien|vijftien|zestien|zeventien|achttien|negentien|twintig|eenentwintig|tweeentwintig|drieentwintig|vierentwintig)\b
MinuteNumRegex: !simpleRegex
Expand Down Expand Up @@ -507,10 +498,6 @@ HolidayRegex2: !nestedRegex
HolidayRegex3: !nestedRegex # -dag suffix
def: (?<holiday>(valentijns|valetijns|nieuwjaars|prinsjes|konings|koninginne|bevrijdings|hemelvaarts|eerste kerst|1e kerst|tweede kerst|2e kerst|vaders|vader|moeders|moeder|meisjes|amerikaanse onafhankelijkheids|onafhankelijkheids|dankzeggings|bosmarmotten|inauguratie|boomplant|boomfeest|vrijgezellen|nederlandse veteranen|veteranen)\s*(dag))(\s+(van\s+|in\s+)?({YearRegex}|{RelativeRegex}\s+jaar))?
references: [ YearRegex, RelativeRegex ]
DateTokenPrefix: 'op '
TimeTokenPrefix: 'om '
TokenBeforeDate: 'op '
TokenBeforeTime: 'om '
AMTimeRegex: !simpleRegex
def: (?<am>'s morgens)
PMTimeRegex: !simpleRegex
Expand All @@ -537,8 +524,6 @@ WithinNextPrefixRegex: !nestedRegex
def: \b(in(\s+de)?(\s+(?<next>{NextPrefixRegex}))?)\b
references: [ NextPrefixRegex ]
# "next" group here is used to judge for illegal cases like "within the next 5 days before today"
AmPmDescRegex: !simpleRegex
def: (ampm)
MorningStartEndRegex: !nestedRegex
def: (^('s morgens|in de morgen|{AmDescRegex}))|(('s morgens|in de morgen|{AmDescRegex})$)
references: [ AmDescRegex ]
Expand Down Expand Up @@ -949,10 +934,7 @@ DayOfMonth: !dictionary
'19e': 19
'20e': 20
'21e': 21
'21e': 21
'22e': 22
'22e': 22
'23e': 23
'23e': 23
'24e': 24
'25e': 25
Expand Down
2 changes: 0 additions & 2 deletions Patterns/English/English-TimeZone.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -367,7 +367,6 @@ AbbrToMinMapping: !dictionary
eat: 180
eet: 120
#eet: -10000
eest: 180
esat: -180
esast: -180
est: -300
Expand Down Expand Up @@ -633,7 +632,6 @@ FullToMinMapping: !dictionary
western europe summer time: 60
w. europe summer time: 60
western european summer time: 60
western europe summer time: 60
west europe summer time: 60
west asia standard time: 300
west pacific standard time: 600
Expand Down
11 changes: 1 addition & 10 deletions Patterns/German/German-DateTime.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ TwoDigitYearRegex: !nestedRegex
def: \b(?<![$])(?<year>([0-27-9]\d))(?!(\s*((\:)|{AmDescRegex}|{PmDescRegex}|\.\d)))\b
references: [ AmDescRegex, PmDescRegex]
FullTextYearRegex: !nestedRegex
def: \b((?<firsttwoyearnum>{CenturyRegex})\s+(?<lasttwoyearnum>((null|zwanzig|dreißig|vierzig|fünfzig|sechzig|siebzig|achtzig|neunzig)\s+{WrittenNumRegex})|{WrittenNumRegex}))\b|\b(?<firsttwoyearnum>{CenturyRegex})\b
def: \b((?<firsttwoyearnum>{CenturyRegex})\s+(?<lasttwoyearnum>((zwanzig|dreißig|vierzig|fünfzig|sechzig|siebzig|achtzig|neunzig)\s+{WrittenNumRegex})|{WrittenNumRegex}))\b|\b(?<firsttwoyearnum>{CenturyRegex})\b
references: [ CenturyRegex, WrittenNumRegex ]
YearRegex: !nestedRegex
def: ({BaseDateTime.FourDigitYearRegex}|{FullTextYearRegex})
Expand Down Expand Up @@ -534,9 +534,6 @@ DecadeWithCenturyRegex: !nestedRegex
RelativeDecadeRegex: !nestedRegex
def: \b((das|die\s+)?{RelativeRegex}\s+((?<number>[\w,]+)\s+)?jahrzehnte?)\b
references: [ RelativeRegex ]
FullTextYearRegex: !nestedRegex
def: \b((?<firsttwoyearnum>{CenturyRegex})\s+(?<lasttwoyearnum>((zwanzig|dreißig|vierzig|fünfzig|sechzig|siebzig|achtzig|neunzig)\s+{WrittenNumRegex})|{WrittenNumRegex}))\b|\b(?<firsttwoyearnum>{CenturyRegex})\b
references: [ CenturyRegex, WrittenNumRegex ]
YearSuffix: !nestedRegex
def: (,?\s*({DateYearRegex}|{FullTextYearRegex}))
references: [ DateYearRegex, FullTextYearRegex ]
Expand Down Expand Up @@ -856,10 +853,7 @@ DayOfMonth: !dictionary
'9. ': 9
'10. ': 10
'11. ': 11
'11. ': 11
'12. ': 12
'12. ': 12
'13. ': 13
'13. ': 13
'14. ': 14
'15. ': 15
Expand Down Expand Up @@ -890,10 +884,7 @@ DayOfMonth: !dictionary
'9': 9
'10': 10
'11': 11
'11': 11
'12': 12
'12': 12
'13': 13
'13': 13
'14': 14
'15': 15
Expand Down
53 changes: 26 additions & 27 deletions Patterns/Japanese/Japanese-DateTime.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,11 @@ MonthRegex: !simpleRegex
def: (?<month>正月|一月|二月|三月|四月|五月|六月|七月|八月|九月|十月|十一月|十二月|01月|02月|03月|04月|05月|06月|07月|08月|09月|10月|11月|12月|1月|2月|3月|4月|5月|6月|7月|8月|9月)
MonthRegexForPeriod: !simpleRegex
def: (?<month>正月|一月|二月|三月|四月|五月|六月|七月|八月|九月|十月|十一月|十二月|01月|02月|03月|04月|05月|06月|07月|08月|09月|10月|11月|12月|1月|2月|3月|4月|5月|6月|7月|8月|9月)(?=\b|t|まで|から)?
DayRegex: !simpleRegex
MonthNumRegexForPeriod: !simpleRegex
def: (?<month>01|02|03|04|05|06|07|08|09|10|11|12|1|2|3|4|5|6|7|8|9)(?=\b|t|まで|から)?
DayRegex: !simpleRegex
def: (?<day>01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|1|2|3|4|5|6|7|8|9)日?
DayRegexForPeriod: !simpleRegex
DayRegexForPeriod: !simpleRegex
def: (?<day>01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|1|20|21|22|23|24|25|26|27|28|29|2|30|31|3|4|5|6|7|8|9)日?(?=\b|t|まで|から)?
DateDayRegexInJapanese: !simpleRegex
def: (?<day>初一|三十|一日|十一日|二十一日|三十一日|二日|三日|四日|五日|六日|七日|八日|九日|十二日|十三日|十四日|十五日|十六日|十七日|十八日|十九日|二十二日|二十三日|二十四日|二十五日|二十六日|二十七日|二十八日|二十九日|一日|十一日|十日|二十一日|二十日|三十一日|三十日|二日|三日|四日|五日|六日|七日|八日|九日|十二日|十三日|十四日|十五日|十六日|十七日|十八日|十九日|二十二日|二十三日|二十四日|二十五日|二十六日|二十七日|二十八日|二十九日|十日|二十日|三十日|10日|11日|12日|13日|14日|15日|16日|17日|18日|19日|1日|20日|21日|22日|23日|24日|25日|26日|27日|28日|29日|2日|30日|31日|3日|4日|5日|6日|7日|8日|9日|一号|十一号|二十一号|三十一号|二号|三号|四号|五号|六号|七号|八号|九号|十二号|十三号|十四号|十五号|十六号|十七号|十八号|十九号|二十二号|二十三号|二十四号|二十五号|二十六号|二十七号|二十八号|二十九号|一号|十一号|十号|二十一号|二十号|三十一号|三十号|二号|三号|四号|五号|六号|七号|八号|九号|十二号|十三号|十四号|十五号|十六号|十七号|十八号|十九号|二十二号|二十三号|二十四号|二十五号|二十六号|二十七号|二十八号|二十九号|十号|二十号|三十号|10号|11号|12号|13号|14号|15号|16号|17号|18号|19号|1号|20号|21号|22号|23号|24号|25号|26号|27号|28号|29号|2号|30号|31号|3号|4号|5号|6号|7号|8号|9号)
Expand Down Expand Up @@ -58,8 +57,8 @@ DateLastRe: !simpleRegex
def: 上个|上一个|上|上一|去
DateNextRe: !simpleRegex
def: 下个|下一个|下|下一|明
SpecialDate: !nestedRegex
def: (?<thisyear>({DateThisRe}|{DateLastRe}|{DateNextRe})年)?(?<thismonth>({DateThisRe}|{DateLastRe}|{DateNextRe})月)?{DateDayRegexInJapanese}
SpecialDate: !nestedRegex
def: (?<thisyear>({DateThisRe}|{DateLastRe}|{DateNextRe})年)?(?<thismonth>({DateThisRe}|{DateLastRe}|{DateNextRe})月)?{DateDayRegexInJapanese}
references: [DateThisRe, DateLastRe, DateNextRe, DateDayRegexInJapanese]
DateUnitRegex: !simpleRegex
def: (?<unit>年|个月|周|日|天)
Expand All @@ -68,12 +67,12 @@ BeforeRegex: !simpleRegex
AfterRegex: !simpleRegex
def: 以后|以後|之后|之後|后|後
DateRegexList1: !nestedRegex
# 2016年12月1日
def: ({YearRegex}[/\\\-]?{MonthRegex}[/\\\-]?{DayRegexForPeriod}\s*({WeekDayRegex})?)
# 2016年12月1日
def: ({YearRegex}[/\\\-]?{MonthRegex}[/\\\-]?{DayRegexForPeriod}\s*({WeekDayRegex})?)
references: [ MonthRegex, DayRegexForPeriod, YearRegex, WeekDayRegex ]
DateRegexList2: !nestedRegex
# 2016年12月
def: ({YearRegex}{MonthRegexForPeriod}\s*)
# 2016年12月
def: ({YearRegex}{MonthRegexForPeriod}\s*)
references: [ MonthRegexForPeriod, YearRegex ]
DateRegexList3: !nestedRegex
def: ((({YearRegex}|{DateYearInJapaneseRegex})年)(\s*))?({LunarRegex}(\s*))?{MonthRegex}(\s*)({DateDayRegexInJapanese}|{DayRegex})((\s*|,|,){WeekDayRegex})?({BeforeRegex}|{AfterRegex})?
Expand All @@ -99,16 +98,16 @@ DateRegexList8: !nestedRegex
def: '{YearNumRegex}\s*[/\\\-\. ]\s*{MonthNumRegex}\s*[/\\\-\. ]\s*{DayRegexForPeriod}'
references: [YearNumRegex, MonthNumRegex, DayRegexForPeriod]
DateRegexList9: !nestedRegex
# 12月/1日
def: (\s*{MonthRegex}[/\\\-]?{DayRegexForPeriod}\s*({WeekDayRegex})?)
# 12月/1日
def: (\s*{MonthRegex}[/\\\-]?{DayRegexForPeriod}\s*({WeekDayRegex})?)
references: [ MonthRegex, DayRegexForPeriod, WeekDayRegex ]
DateRegexList10: !nestedRegex
# 2016/12/23
def: ({YearRegex}[/\\\-]{MonthNumRegex}[/\\\-]{DayRegexForPeriod})
# 2016/12/23
def: ({YearRegex}[/\\\-]{MonthNumRegex}[/\\\-]{DayRegexForPeriod})
references: [ MonthNumRegex, DayRegexForPeriod, YearRegex ]
DateRegexList11: !nestedRegex
# 2016/12
def: ({YearRegex}[/\\\-]{MonthNumRegexForPeriod})
# 2016/12
def: ({YearRegex}[/\\\-]{MonthNumRegexForPeriod})
references: [ MonthNumRegexForPeriod, YearRegex ]
# Note that these "Till" connector can be used without any suffix like "之间|之内|期间|中间|间"
# DatePeriodExtractorJap
Expand Down Expand Up @@ -589,19 +588,19 @@ ParserConfigurationDayOfMonth: !dictionary
ParserConfigurationDayOfWeek: !dictionary
types: [string, int]
entries:
月曜: 1
火曜: 2
水曜: 3
木曜: 4
金曜: 5
土曜: 6
日曜: 0
月曜日: 1
火曜日: 2
水曜日: 3
木曜日: 4
金曜日: 5
土曜日: 6
月曜: 1
火曜: 2
水曜: 3
木曜: 4
金曜: 5
土曜: 6
日曜: 0
月曜日: 1
火曜日: 2
水曜日: 3
木曜日: 4
金曜日: 5
土曜日: 6
日曜日: 0
ParserConfigurationMonthOfYear: !dictionary
types: [string, int]
Expand Down
2 changes: 1 addition & 1 deletion Patterns/Spanish/Spanish-DateTime.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
---
---
TillRegex: !simpleRegex
def: (?<till>hasta|al|a|--|-|—|——)(\s+(el|la(s)?))?
AndRegex: !simpleRegex
Expand Down

0 comments on commit bf2e3c6

Please sign in to comment.