@@ -483,14 +483,17 @@ Optional<LocaleID> parse_unicode_locale_id(StringView locale)
483
483
static void perform_hard_coded_key_value_substitutions (String& key, String& value)
484
484
{
485
485
// FIXME: In the XML export of CLDR, there are some aliases defined in the following files:
486
+ // https://github.com/unicode-org/cldr-staging/blob/master/production/common/bcp47/calendar.xml
486
487
// https://github.com/unicode-org/cldr-staging/blob/master/production/common/bcp47/collation.xml
487
488
// https://github.com/unicode-org/cldr-staging/blob/master/production/common/bcp47/measure.xml
488
489
// https://github.com/unicode-org/cldr-staging/blob/master/production/common/bcp47/timezone.xml
489
490
// https://github.com/unicode-org/cldr-staging/blob/master/production/common/bcp47/transform.xml
490
491
//
491
492
// There doesn't seem to be a counterpart in the JSON export. Since there aren't many such
492
493
// aliases, until an XML parser is implemented, those aliases are implemented here.
493
- if (key.is_one_of (" kb" sv, " kc" sv, " kh" sv, " kk" sv, " kn" sv) && (value == " yes" sv)) {
494
+ if ((key == " ca" sv) && (value == " islamicc" sv)) {
495
+ value = " islamic-civil" sv;
496
+ } else if (key.is_one_of (" kb" sv, " kc" sv, " kh" sv, " kk" sv, " kn" sv) && (value == " yes" sv)) {
494
497
value = " true" sv;
495
498
} else if (key == " ks" sv) {
496
499
if (value == " primary" sv)
@@ -540,6 +543,20 @@ static void perform_hard_coded_key_value_substitutions(String& key, String& valu
540
543
}
541
544
}
542
545
546
+ static void perform_hard_coded_key_multi_value_substitutions (String const & key, Vector<String>& values)
547
+ {
548
+ // Similar to perform_hard_coded_key_value_substitutions, some aliases depend on multiple
549
+ // variants being present in the original locale. Those are canonicalized separately here.
550
+ // https://github.com/unicode-org/cldr-staging/blob/master/production/common/bcp47/calendar.xml
551
+ if ((key != " ca" sv) || (values.size () != 3 ))
552
+ return ;
553
+
554
+ static Vector<String> ethiopic_amete_alem { " ethiopic" sv, " amete" sv, " alem" sv };
555
+
556
+ if (values == ethiopic_amete_alem)
557
+ values = { " ethioaa" sv };
558
+ }
559
+
543
560
static void transform_unicode_locale_id_to_canonical_syntax (LocaleID& locale_id)
544
561
{
545
562
auto canonicalize_language = [](LanguageID& language_id, bool force_lowercase) {
@@ -626,6 +643,8 @@ static void transform_unicode_locale_id_to_canonical_syntax(LocaleID& locale_id)
626
643
627
644
values.append (move (value));
628
645
}
646
+
647
+ perform_hard_coded_key_multi_value_substitutions (key, values);
629
648
};
630
649
631
650
canonicalize_language (locale_id.language_id , false );
@@ -644,22 +663,22 @@ static void transform_unicode_locale_id_to_canonical_syntax(LocaleID& locale_id)
644
663
for (auto & extension : locale_id.extensions ) {
645
664
extension.visit (
646
665
[&](LocaleExtension& ext) {
647
- quick_sort (ext.attributes );
648
- quick_sort (ext.keywords , [](auto const & a, auto const & b) { return a.key < b.key ; });
649
-
650
666
for (auto & attribute : ext.attributes )
651
667
attribute = attribute.to_lowercase ();
652
668
for (auto & keyword : ext.keywords )
653
669
canonicalize_key_value_list (keyword.key , keyword.types , true );
670
+
671
+ quick_sort (ext.attributes );
672
+ quick_sort (ext.keywords , [](auto const & a, auto const & b) { return a.key < b.key ; });
654
673
},
655
674
[&](TransformedExtension& ext) {
656
675
if (ext.language .has_value ())
657
676
canonicalize_language (*ext.language , true );
658
677
659
- quick_sort (ext.fields , [](auto const & a, auto const & b) { return a.key < b.key ; });
660
-
661
678
for (auto & field : ext.fields )
662
679
canonicalize_key_value_list (field.key , field.values , false );
680
+
681
+ quick_sort (ext.fields , [](auto const & a, auto const & b) { return a.key < b.key ; });
663
682
},
664
683
[&](OtherExtension& ext) {
665
684
ext.key = static_cast <char >(to_ascii_lowercase (ext.key ));
0 commit comments