diff --git a/charclass_invlists.h b/charclass_invlists.h index 6c4fd15e9d1b..6b9677893986 100644 --- a/charclass_invlists.h +++ b/charclass_invlists.h @@ -419900,7 +419900,7 @@ static const U8 WB_table[23][23] = { * baba9dfc133e3cb770a89aaf0973b1341fa61c2da6c176baf6428898b3b568d8 lib/unicore/extracted/DLineBreak.txt * 6d4a8c945dd7db83ed617cbb7d937de7f4ecf016ff22970d846e996a7c9a2a5d lib/unicore/extracted/DNumType.txt * 5b7c14380d5cceeaffcfbc18db1ed936391d2af2d51f5a41f1a17b692c77e59b lib/unicore/extracted/DNumValues.txt - * e1bf3b84f01cb4fec63c65f71c1a1ea1af644d7c142810d17497ff8a23b781c7 lib/unicore/mktables + * 497b589915a64625b274215e8b8ca02b7051ecbd4dd85a488807e76a41bc707d lib/unicore/mktables * 50b85a67451145545a65cea370dab8d3444fbfe07e9c34cef560c5b7da9d3eef lib/unicore/version * 0a6b5ab33bb1026531f816efe81aea1a8ffcd34a27cbea37dd6a70a63d73c844 regen/charset_translations.pl * 5f8520d3a17ade6317fc0c423f5091470924b1ef425bca0c41ce8e4a9f8460fe regen/mk_PL_charclass.pl diff --git a/lib/Unicode/UCD.t b/lib/Unicode/UCD.t index eb7fbd8f3539..5e2aa8b86a14 100644 --- a/lib/Unicode/UCD.t +++ b/lib/Unicode/UCD.t @@ -1596,45 +1596,9 @@ is(@list, 0, "prop_invmap('Perl_Charnames') returns since internal-Perl- @list = prop_invmap("Is_Is_Any"); is(@list, 0, "prop_invmap('Is_Is_Any') returns since two is's"); -# The files for these properties are not used by Perl, but are retained for -# backwards compatibility with applications that read them directly, with -# comments in them that their use is deprecated. Until such time as we remove -# them completely, we test that they exist, are correct, and that their -# formats haven't changed. This hash contains the info needed to test them as -# if they were regular properties. 'replaced_by' gives the equivalent -# property now used by Perl. -my %legacy_props = ( - Legacy_Case_Folding => { replaced_by => 'cf', - file => 'To/Fold', - swash_name => 'ToFold' - }, - Legacy_Lowercase_Mapping => { replaced_by => 'lc', - file => 'To/Lower', - swash_name => 'ToLower' - }, - Legacy_Titlecase_Mapping => { replaced_by => 'tc', - file => 'To/Title', - swash_name => 'ToTitle' - }, - Legacy_Uppercase_Mapping => { replaced_by => 'uc', - file => 'To/Upper', - swash_name => 'ToUpper' - }, - Legacy_Perl_Decimal_Digit => { replaced_by => 'Perl_Decimal_Digit', - file => 'To/Digit', - swash_name => 'ToDigit' - }, - ); - -foreach my $legacy_prop (keys %legacy_props) { - @list = prop_invmap($legacy_prop); - is(@list, 0, "'$legacy_prop' is unknown to prop_invmap"); -} - # The files for these properties shouldn't have their formats changed in case # applications use them (though such use is deprecated). -my @legacy_file_format = (keys %legacy_props, - qw( Bidi_Mirroring_Glyph +my @legacy_file_format = (qw( Bidi_Mirroring_Glyph NFKC_Casefold ) ); @@ -1658,8 +1622,7 @@ my %tested_invmaps; # lists returned by prop_invlist(), which has already been tested. PROPERTY: -foreach my $prop (sort(keys %props), sort keys %legacy_props) { - my $is_legacy = 0; +foreach my $prop (sort(keys %props)) { my $loose_prop = &Unicode::UCD::loose_name(lc $prop); my $suppressed = grep { $_ eq $loose_prop } @Unicode::UCD::suppressed_properties; @@ -1673,39 +1636,12 @@ foreach my $prop (sort(keys %props), sort keys %legacy_props) { if (! $name) { # Here, Perl doesn't know about this property. It could be a - # suppressed one, or a legacy one. - if (grep { $prop eq $_ } keys %legacy_props) { - - # For legacy properties, we look up the modern equivalent - # property instead; later massaging the results to look like the - # known format of the legacy property. We add info about the - # legacy property to the data structures for the rest of the - # properties; this is to avoid more special cases for the legacies - # in the code below - $full_name = $name = $prop; - $actual_lookup_prop = $legacy_props{$prop}->{'replaced_by'}; - my $base_file = $legacy_props{$prop}->{'file'}; - - # This legacy property is otherwise unknown to Perl; so shouldn't - # have any information about it already. - ok(! exists $Unicode::UCD::loose_property_to_file_of{$loose_prop}, - "There isn't a hash entry for file lookup of $prop"); - $Unicode::UCD::loose_property_to_file_of{$loose_prop} = $base_file; - - ok(! exists $Unicode::UCD::file_to_swash_name{$loose_prop}, - "There isn't a hash entry for swash lookup of $prop"); - $Unicode::UCD::file_to_swash_name{$base_file} - = $legacy_props{$prop}->{'swash_name'}; - $display_prop = $prop; - $is_legacy = 1; - } - else { + # suppressed one if (! $suppressed) { fail("prop_invmap('$prop')"); diag("is unknown to prop_aliases(), and we need it in order to test prop_invmap"); } next PROPERTY; - } } # Normalize the short name, as it is stored in the hashes under the @@ -1728,49 +1664,6 @@ foreach my $prop (sort(keys %props), sort keys %legacy_props) { my ($invlist_ref, $invmap_ref, $format, $missing) = prop_invmap($actual_lookup_prop); my $return_ref = [ $invlist_ref, $invmap_ref, $format, $missing ]; - - # The legacy property files all are expanded out so that each range is 1 - # element long. That isn't true of the modern equivalent we use to check - # those files for correctness against. So take the output of the proxy - # and expand it to match the legacy file. - if ($is_legacy) { - my @expanded_list; - my @expanded_map; - for my $i (0 .. @$invlist_ref - 1 - 1) { - if (ref $invmap_ref->[$i] || $invmap_ref->[$i] eq $missing) { - - # No adjustments should be done for the default mapping and - # the multi-char ones. - push @expanded_list, $invlist_ref->[$i]; - push @expanded_map, $invmap_ref->[$i]; - } - else { - - # Expand the range into separate elements for each item. - my $offset = 0; - for my $j ($invlist_ref->[$i] .. $invlist_ref->[$i+1] -1) { - push @expanded_list, $j; - push @expanded_map, $invmap_ref->[$i] + $offset; - - # The 'ae' format is for Legacy_Perl_Decimal_Digit; the - # other 4 are kept with leading zeros in the file, so - # convert to that. - $expanded_map[-1] = sprintf("%04X", $expanded_map[-1]) - if $format ne 'ae'; - $offset++; - } - } - } - - # Final element is taken as is. The map should always be to the - # default value, so don't do a sprintf like we did above. - push @expanded_list, $invlist_ref->[-1]; - push @expanded_map, $invmap_ref->[-1]; - - $invlist_ref = \@expanded_list; - $invmap_ref = \@expanded_map; - } - # If have already tested this property under a different name, merely # compare the return from now with the saved one from before. if (exists $tested_invmaps{$name}) { @@ -1861,11 +1754,6 @@ foreach my $prop (sort(keys %props), sort keys %legacy_props) { next PROPERTY; } } - elsif ($missing ne "0" && ! grep { $prop eq $_ } keys %legacy_props) { - fail("prop_invmap('$display_prop')"); - diag("The missings should be '0'; got '$missing'"); - next PROPERTY; - } } elsif ($missing =~ /[<>]/) { fail("prop_invmap('$display_prop')"); @@ -2060,21 +1948,18 @@ foreach my $prop (sort(keys %props), sort keys %legacy_props) { ? "%04X" : $file_range_format; - # Certain of the proxy properties have to be adjusted to match the - # real ones. - if ($full_name - =~ /^(Legacy_)?(Case_Folding|(Lower|Title|Upper)case_Mapping)/) + # Combination properties, where the same file contains mappings to both + # the simple and full versions, have to be adjusted when looking at + # the full versions. + if ($full_name =~ /^ ( Case_Folding + | (Lower|Title|Upper) case_Mapping ) + $ /x) { - - # Here we have either - # 1) Case_Folding; or - # 2) a proxy that is a full mapping, which means that what the - # real property is is the equivalent simple mapping. - # In both cases, the file will have a standard list containing - # simple mappings (to a single code point), and a specials hash - # which contains all the mappings that are to multiple code - # points. First, extract a list containing all the file's simple - # mappings. + # The file will have a standard list containing simple mappings + # (to a single code point), and a specials hash which contains all + # the mappings that are to multiple code points. + # + # First, extract a list containing all the file's simple mappings. my @list; for (split "\n", $official) { my ($start, $end, $value) = / ^ (.+?) \t (.*?) \t (.+?) diff --git a/lib/unicore/mktables b/lib/unicore/mktables index 3c1da94b53f0..5732c616dfa1 100644 --- a/lib/unicore/mktables +++ b/lib/unicore/mktables @@ -6717,23 +6717,13 @@ sub trace { return main::trace(@_); } main::set_access('anomalous_entries', # Append singular, read plural \%anomalous_entries, 'readable_array'); - - my %replacement_property; - # Certain files are unused by Perl itself, and are kept only for backwards - # compatibility for programs that used them before Unicode::UCD existed. - # These are termed legacy properties. At some point they may be removed, - # but for now mark them as legacy. If non empty, this is the name of the - # property to use instead (i.e., the modern equivalent). - main::set_access('replacement_property', \%replacement_property, 'r'); - my %to_output_map; # Enum as to whether or not to write out this map table, and how: # 0 don't output # $EXTERNAL_MAP means its existence is noted in the documentation, and # it should not be removed nor its format changed. This # is done for those files that have traditionally been - # output. Maps of legacy-only properties default to - # this. + # output. # $INTERNAL_MAP means Perl reserves the right to do anything it wants # with this file # $OUTPUT_ADJUSTED means that it is an $INTERNAL_MAP, and instead of @@ -6758,17 +6748,9 @@ sub trace { return main::trace(@_); } my $default_map = delete $args{'Default_Map'}; my $property = delete $args{'_Property'}; my $full_name = delete $args{'Full_Name'}; - my $replacement_property = delete $args{'Replacement_Property'} // ""; my $to_output_map = delete $args{'To_Output_Map'}; - # Rest of parameters passed on; legacy properties have several common - # other attributes - if ($replacement_property) { - $args{"Fate"} = $LEGACY_ONLY; - $args{"Range_Size_1"} = 1; - $args{"Perl_Extension"} = 1; - $args{"UCD"} = 0; - } + # Rest of parameters passed on my $range_list = Range_Map->new(Owner => $property); @@ -6785,9 +6767,6 @@ sub trace { return main::trace(@_); } $anomalous_entries{$addr} = []; $default_map{$addr} = $default_map; - $replacement_property{$addr} = $replacement_property; - $to_output_map = $EXTERNAL_MAP if ! defined $to_output_map - && $replacement_property; $to_output_map{$addr} = $to_output_map; $self->initialize($initialize) if defined $initialize; @@ -6993,12 +6972,8 @@ sub trace { return main::trace(@_); } $return .= $INTERNAL_ONLY_HEADER; } else { - my $property_name = $self->property->replacement_property; - - # The legacy-only properties were gotten above; but there are some - # other properties whose files are in current use that have fixed - # formats. - $property_name = $self->property->full_name unless $property_name; + # Other properties have fixed formats. + my $property_name = $self->property->full_name; $return .= <fate != $ORDINARY && $map{$addr}->fate != $LEGACY_ONLY @@ -9379,7 +9352,6 @@ sub trace { return main::trace(@_) if main::DEBUG && $to_trace } initialize inverse_list is_empty - replacement_property name note perl_extension @@ -12274,26 +12246,6 @@ sub filter_old_style_arabic_shaping { Carp::my_carp_bug("Need to process UnicodeData before SpecialCasing. Only special casing will be generated."); } - # Create a table in the old-style format and with the original - # file name for backwards compatibility with applications that - # read it directly. The new tables contain both the simple and - # full maps, and the old are missing simple maps when there is a - # conflicting full one. Probably it would have been ok to add - # those to the legacy version, as was already done in 5.14 to the - # case folding one, but this was not done, out of an abundance of - # caution. The tables are set up here before we deal with the - # full maps so that as we handle those, we can override the simple - # maps for them in the legacy table, and merely add them in the - # new-style one. - my $legacy = Property->new("Legacy_" . $full_casing_full_name, - File => $full_casing_full_name - =~ s/case_Mapping//r, - Format => $HEX_FORMAT, - Default_Map => $CODE_POINT, - Initialize => $full_casing_table, - Replacement_Property => $full_casing_full_name, - ); - $full_casing_table->add_comment(join_lines( <insert_adjusted_lines("$fields[0]; Legacy_" - . $object->full_name - . "; $fields[$i]"); - - # ... and the regular table, in which it is additional, - # beyond the simple mapping. + # The mapping is additional, beyond the simple mapping. $file->insert_adjusted_lines("$fields[0]; " . $object->name . "; " @@ -13705,25 +13650,6 @@ END $gc->table('Ll')->set_caseless_equivalent($LC); $gc->table('Lu')->set_caseless_equivalent($LC); - # Create digit and case fold tables with the original file names for - # backwards compatibility with applications that read them directly. - my $Digit = Property->new("Legacy_Perl_Decimal_Digit", - Default_Map => "", - File => 'Digit', # Trad. location - Directory => $map_directory, - Type => $STRING, - Replacement_Property => "Perl_Decimal_Digit", - Initialize => property_ref('Perl_Decimal_Digit'), - ); - $Digit->add_comment(join_lines(<table('Decimal')->ranges) { @@ -13736,16 +13662,6 @@ END . " and will have to be fixed. Proceeding anyway."); } - Property->new('Legacy_Case_Folding', - File => "Fold", - Directory => $map_directory, - Default_Map => $CODE_POINT, - Type => $STRING, - Replacement_Property => "Case_Folding", - Format => $HEX_FORMAT, - Initialize => property_ref('cf'), - ); - # Mark the scx table as the parent of the corresponding sc table for those # which are identical. This causes the pod for the script table to refer # to the corresponding scx one. This is done after everything, so as to diff --git a/lib/unicore/uni_keywords.pl b/lib/unicore/uni_keywords.pl index 4e6246445fdc..70d486c7b266 100644 --- a/lib/unicore/uni_keywords.pl +++ b/lib/unicore/uni_keywords.pl @@ -1295,7 +1295,7 @@ # baba9dfc133e3cb770a89aaf0973b1341fa61c2da6c176baf6428898b3b568d8 lib/unicore/extracted/DLineBreak.txt # 6d4a8c945dd7db83ed617cbb7d937de7f4ecf016ff22970d846e996a7c9a2a5d lib/unicore/extracted/DNumType.txt # 5b7c14380d5cceeaffcfbc18db1ed936391d2af2d51f5a41f1a17b692c77e59b lib/unicore/extracted/DNumValues.txt -# e1bf3b84f01cb4fec63c65f71c1a1ea1af644d7c142810d17497ff8a23b781c7 lib/unicore/mktables +# 497b589915a64625b274215e8b8ca02b7051ecbd4dd85a488807e76a41bc707d lib/unicore/mktables # 50b85a67451145545a65cea370dab8d3444fbfe07e9c34cef560c5b7da9d3eef lib/unicore/version # 0a6b5ab33bb1026531f816efe81aea1a8ffcd34a27cbea37dd6a70a63d73c844 regen/charset_translations.pl # 5f8520d3a17ade6317fc0c423f5091470924b1ef425bca0c41ce8e4a9f8460fe regen/mk_PL_charclass.pl diff --git a/regcharclass.h b/regcharclass.h index f967d54c5acd..27b5845b337f 100644 --- a/regcharclass.h +++ b/regcharclass.h @@ -3762,7 +3762,7 @@ * baba9dfc133e3cb770a89aaf0973b1341fa61c2da6c176baf6428898b3b568d8 lib/unicore/extracted/DLineBreak.txt * 6d4a8c945dd7db83ed617cbb7d937de7f4ecf016ff22970d846e996a7c9a2a5d lib/unicore/extracted/DNumType.txt * 5b7c14380d5cceeaffcfbc18db1ed936391d2af2d51f5a41f1a17b692c77e59b lib/unicore/extracted/DNumValues.txt - * e1bf3b84f01cb4fec63c65f71c1a1ea1af644d7c142810d17497ff8a23b781c7 lib/unicore/mktables + * 497b589915a64625b274215e8b8ca02b7051ecbd4dd85a488807e76a41bc707d lib/unicore/mktables * 50b85a67451145545a65cea370dab8d3444fbfe07e9c34cef560c5b7da9d3eef lib/unicore/version * 0a6b5ab33bb1026531f816efe81aea1a8ffcd34a27cbea37dd6a70a63d73c844 regen/charset_translations.pl * 1aa94679c695efd507b7e4491629dba1021b74c21a5324dfd3a582a5d654bd32 regen/regcharclass.pl diff --git a/uni_keywords.h b/uni_keywords.h index 6f09587f07b9..97bb7b3f7210 100644 --- a/uni_keywords.h +++ b/uni_keywords.h @@ -7542,7 +7542,7 @@ MPH_VALt match_uniprop( const unsigned char * const key, const U16 key_len ) { * baba9dfc133e3cb770a89aaf0973b1341fa61c2da6c176baf6428898b3b568d8 lib/unicore/extracted/DLineBreak.txt * 6d4a8c945dd7db83ed617cbb7d937de7f4ecf016ff22970d846e996a7c9a2a5d lib/unicore/extracted/DNumType.txt * 5b7c14380d5cceeaffcfbc18db1ed936391d2af2d51f5a41f1a17b692c77e59b lib/unicore/extracted/DNumValues.txt - * e1bf3b84f01cb4fec63c65f71c1a1ea1af644d7c142810d17497ff8a23b781c7 lib/unicore/mktables + * 497b589915a64625b274215e8b8ca02b7051ecbd4dd85a488807e76a41bc707d lib/unicore/mktables * 50b85a67451145545a65cea370dab8d3444fbfe07e9c34cef560c5b7da9d3eef lib/unicore/version * 0a6b5ab33bb1026531f816efe81aea1a8ffcd34a27cbea37dd6a70a63d73c844 regen/charset_translations.pl * 5f8520d3a17ade6317fc0c423f5091470924b1ef425bca0c41ce8e4a9f8460fe regen/mk_PL_charclass.pl