Skip to content

Commit a337b05

Browse files
trflynn89linusg
authored andcommitted
LibUnicode: Parse and generate per-locale plural ranges
1 parent a718c62 commit a337b05

File tree

3 files changed

+113
-11
lines changed

3 files changed

+113
-11
lines changed

Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodePluralRules.cpp

Lines changed: 107 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -184,13 +184,22 @@ struct Condition {
184184
Vector<Vector<Relation>> relations;
185185
};
186186

187+
struct Range {
188+
String start;
189+
String end;
190+
String category;
191+
};
192+
193+
using Conditions = HashMap<String, Condition>;
194+
using Ranges = Vector<Range>;
195+
187196
struct Locale {
188197
static String generated_method_name(StringView form, StringView locale)
189198
{
190199
return String::formatted("{}_plurality_{}", form, format_identifier({}, locale));
191200
}
192201

193-
HashMap<String, Condition>& rules_for_form(StringView form)
202+
Conditions& rules_for_form(StringView form)
194203
{
195204
if (form == "cardinal")
196205
return cardinal_rules;
@@ -199,8 +208,9 @@ struct Locale {
199208
VERIFY_NOT_REACHED();
200209
}
201210

202-
HashMap<String, Condition> cardinal_rules;
203-
HashMap<String, Condition> ordinal_rules;
211+
Conditions cardinal_rules;
212+
Conditions ordinal_rules;
213+
Ranges plural_ranges;
204214
};
205215

206216
struct UnicodeLocaleData {
@@ -276,7 +286,7 @@ static Relation parse_relation(StringView relation)
276286
//
277287
// The "sample" being series of integer or decimal values that fit the specified condition. The
278288
// condition may be one or more binary expressions, chained together with "and" or "or" operators.
279-
static void parse_condition(StringView category, StringView rule, HashMap<String, Condition>& rules)
289+
static void parse_condition(StringView category, StringView rule, Conditions& rules)
280290
{
281291
static constexpr auto other_category = "other"sv;
282292
static constexpr auto disjunction_keyword = " or "sv;
@@ -348,6 +358,43 @@ static ErrorOr<void> parse_plural_rules(String core_supplemental_path, StringVie
348358
return {};
349359
}
350360

361+
// https://unicode.org/reports/tr35/tr35-numbers.html#Plural_Ranges
362+
static ErrorOr<void> parse_plural_ranges(String core_supplemental_path, UnicodeLocaleData& locale_data)
363+
{
364+
static constexpr auto start_segment = "-start-"sv;
365+
static constexpr auto end_segment = "-end-"sv;
366+
367+
LexicalPath plural_ranges_path(move(core_supplemental_path));
368+
plural_ranges_path = plural_ranges_path.append("pluralRanges.json"sv);
369+
370+
auto plural_ranges = TRY(read_json_file(plural_ranges_path.string()));
371+
auto const& supplemental_object = plural_ranges.as_object().get("supplemental"sv);
372+
auto const& plurals_object = supplemental_object.as_object().get("plurals"sv);
373+
374+
plurals_object.as_object().for_each_member([&](auto const& loc, auto const& ranges_object) {
375+
auto locale = locale_data.locales.get(loc);
376+
if (!locale.has_value())
377+
return;
378+
379+
ranges_object.as_object().for_each_member([&](auto const& range, auto const& category) {
380+
auto start_index = range.find(start_segment);
381+
VERIFY(start_index.has_value());
382+
383+
auto end_index = range.find(end_segment);
384+
VERIFY(end_index.has_value());
385+
386+
*start_index += start_segment.length();
387+
388+
auto start = range.substring(*start_index, *end_index - *start_index);
389+
auto end = range.substring(*end_index + end_segment.length());
390+
391+
locale->plural_ranges.empend(move(start), move(end), category.as_string());
392+
});
393+
});
394+
395+
return {};
396+
}
397+
351398
static ErrorOr<void> parse_all_locales(String core_path, String locale_names_path, UnicodeLocaleData& locale_data)
352399
{
353400
auto identity_iterator = TRY(path_to_dir_iterator(move(locale_names_path)));
@@ -378,6 +425,7 @@ static ErrorOr<void> parse_all_locales(String core_path, String locale_names_pat
378425

379426
TRY(parse_plural_rules(core_supplemental_path.string(), "plurals.json"sv, locale_data));
380427
TRY(parse_plural_rules(core_supplemental_path.string(), "ordinals.json"sv, locale_data));
428+
TRY(parse_plural_ranges(core_supplemental_path.string(), locale_data));
381429
return {};
382430
}
383431

@@ -421,12 +469,18 @@ static ErrorOr<void> generate_unicode_locale_implementation(Core::Stream::Buffer
421469
namespace Unicode {
422470
423471
using PluralCategoryFunction = PluralCategory(*)(PluralOperands);
472+
using PluralRangeFunction = PluralCategory(*)(PluralCategory, PluralCategory);
424473
425474
static PluralCategory default_category(PluralOperands)
426475
{
427476
return PluralCategory::Other;
428477
}
429478
479+
static PluralCategory default_range(PluralCategory, PluralCategory end)
480+
{
481+
return end;
482+
}
483+
430484
)~~~");
431485

432486
auto append_rules = [&](auto form, auto const& locale, auto const& rules) {
@@ -459,19 +513,47 @@ static PluralCategory @method@([[maybe_unused]] PluralOperands ops)
459513
)~~~");
460514
};
461515

462-
auto append_lookup_table = [&](auto form) {
516+
auto append_ranges = [&](auto const& locale, auto const& ranges) {
517+
if (ranges.is_empty())
518+
return;
519+
520+
generator.set("method"sv, Locale::generated_method_name("range"sv, locale));
521+
522+
generator.append(R"~~~(
523+
static PluralCategory @method@(PluralCategory start, PluralCategory end)
524+
{)~~~");
525+
526+
for (auto const& range : ranges) {
527+
generator.set("start"sv, format_identifier({}, range.start));
528+
generator.set("end"sv, format_identifier({}, range.end));
529+
generator.set("category"sv, format_identifier({}, range.category));
530+
531+
generator.append(R"~~~(
532+
if (start == PluralCategory::@start@ && end == PluralCategory::@end@)
533+
return PluralCategory::@category@;)~~~");
534+
}
535+
536+
generator.append(R"~~~(
537+
return end;
538+
}
539+
)~~~");
540+
};
541+
542+
auto append_lookup_table = [&](auto type, auto form, auto default_, auto data_for_locale) {
543+
generator.set("type"sv, type);
463544
generator.set("form"sv, form);
545+
generator.set("default"sv, default_);
464546
generator.set("size"sv, String::number(locales.size()));
465547

466548
generator.append(R"~~~(
467-
static constexpr Array<PluralCategoryFunction, @size@> s_@form@_functions { {)~~~");
549+
static constexpr Array<@type@, @size@> s_@form@_functions { {)~~~");
468550

469551
for (auto const& locale : locales) {
470-
auto& rules = locale_data.locales.find(locale)->value;
552+
auto& rules = data_for_locale(locale_data.locales.find(locale)->value, form);
471553

472-
if (rules.rules_for_form(form).is_empty()) {
554+
if (rules.is_empty()) {
473555
generator.append(R"~~~(
474-
default_category,)~~~");
556+
@default@,)~~~");
475557
} else {
476558
generator.set("method"sv, Locale::generated_method_name(form, locale));
477559
generator.append(R"~~~(
@@ -502,10 +584,12 @@ static constexpr Array<PluralCategory, @size@> @name@ { { PluralCategory::Other)
502584
for (auto [locale, rules] : locale_data.locales) {
503585
append_rules("cardinal"sv, locale, rules.cardinal_rules);
504586
append_rules("ordinal"sv, locale, rules.ordinal_rules);
587+
append_ranges(locale, rules.plural_ranges);
505588
}
506589

507-
append_lookup_table("cardinal"sv);
508-
append_lookup_table("ordinal"sv);
590+
append_lookup_table("PluralCategoryFunction"sv, "cardinal"sv, "default_category"sv, [](auto& rules, auto form) -> Conditions& { return rules.rules_for_form(form); });
591+
append_lookup_table("PluralCategoryFunction"sv, "ordinal"sv, "default_category"sv, [](auto& rules, auto form) -> Conditions& { return rules.rules_for_form(form); });
592+
append_lookup_table("PluralRangeFunction"sv, "range"sv, "default_range"sv, [](auto& rules, auto) -> Ranges& { return rules.plural_ranges; });
509593

510594
generate_mapping(generator, locales, "PluralCategory"sv, "s_cardinal_categories"sv, "s_cardinal_categories_{}", format_identifier,
511595
[&](auto const& name, auto const& locale) {
@@ -559,6 +643,18 @@ Span<PluralCategory const> available_plural_categories(StringView locale, Plural
559643
VERIFY_NOT_REACHED();
560644
}
561645
646+
PluralCategory determine_plural_range(StringView locale, PluralCategory start, PluralCategory end)
647+
{
648+
auto locale_value = locale_from_string(locale);
649+
if (!locale_value.has_value())
650+
return PluralCategory::Other;
651+
652+
auto locale_index = to_underlying(*locale_value) - 1; // Subtract 1 because 0 == Locale::None.
653+
654+
PluralRangeFunction decider = s_range_functions[locale_index];
655+
return decider(start, end);
656+
}
657+
562658
}
563659
)~~~");
564660

Userland/Libraries/LibUnicode/PluralRules.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,4 +40,9 @@ Span<PluralCategory const> __attribute__((weak)) available_plural_categories(Str
4040
return categories.span();
4141
}
4242

43+
PluralCategory __attribute__((weak)) determine_plural_range(StringView, PluralCategory, PluralCategory)
44+
{
45+
return PluralCategory::Other;
46+
}
47+
4348
}

Userland/Libraries/LibUnicode/PluralRules.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,5 +119,6 @@ constexpr StringView plural_category_to_string(PluralCategory category)
119119

120120
PluralCategory determine_plural_category(StringView locale, PluralForm form, PluralOperands operands);
121121
Span<PluralCategory const> available_plural_categories(StringView locale, PluralForm form);
122+
PluralCategory determine_plural_range(StringView locale, PluralCategory start, PluralCategory end);
122123

123124
}

0 commit comments

Comments
 (0)