From 6e31710c045dafbbf99c5d435220130672a9b156 Mon Sep 17 00:00:00 2001 From: Mike O'Regan Date: Sun, 16 Nov 2014 19:06:58 -0600 Subject: [PATCH] fix GitHub #122: "x64" being parsed as x operator plus number fixes #122 fixes #123 --- lib/PPI/Token/Whitespace.pm | 37 ++++++++++++++++++++----------- lib/PPI/Tokenizer.pm | 44 ++++++++++++++++++++++++++++++++----- t/ppi_statement_package.t | 9 ++------ t/ppi_statement_sub.t | 3 --- t/ppi_token_operator.t | 3 --- 5 files changed, 64 insertions(+), 32 deletions(-) diff --git a/lib/PPI/Token/Whitespace.pm b/lib/PPI/Token/Whitespace.pm index 1ab2880c..1a5fdd01 100644 --- a/lib/PPI/Token/Whitespace.pm +++ b/lib/PPI/Token/Whitespace.pm @@ -390,19 +390,30 @@ sub __TOKENIZER__on_char { return 'Operator'; } elsif ( $char == 120 ) { # $char eq 'x' - # x followed immediately by a digit can be the x - # operator or a word. Disambiguate by checking - # whether the previous token is an operator that cannot be - # followed by the x operator, e.g.: +. - # - # x followed immediately by '=' is the 'x=' operator, not - # 'x ='. An important exception is x followed immediately by - # '=>', which makes the x into a bareword. - pos $t->{line} = $t->{line_cursor} + 1; - return 'Operator' - if $t->_current_x_is_operator and $t->{line} =~ m/\G(?:\d|(?!(=>|[\w\s])))/gc; - - # Otherwise, commit like a normal bareword + # Could be a word, the x= operator, the x operator + # followed by whitespace, or the x operator without any + # space between itself and its operand, e.g.: '$a x3', + # which is the same as '$a x 3'. _current_x_is_operator + # assumes we have a complete 'x' token, but we don't + # yet. We may need to split this x character apart from + # what follows it. + if ( $t->_current_x_is_operator ) { + pos $t->{line} = $t->{line_cursor} + 1; + return 'Operator' if $t->{line} =~ m/\G(?: + \d # x op with no whitespace e.g. 'x3' + | + (?!( # negative lookahead + => # not on left of fat comma + | + \w # not a word like "xyzzy" + | + \s # not x op plus whitespace + )) + )/gcx; + } + + # Otherwise, commit like a normal bareword, including x + # operator followed by whitespace. return PPI::Token::Word->__TOKENIZER__commit($t); } elsif ( $char == 45 ) { # $char eq '-' diff --git a/lib/PPI/Tokenizer.pm b/lib/PPI/Tokenizer.pm index 75ad5f11..61ba9c52 100644 --- a/lib/PPI/Tokenizer.pm +++ b/lib/PPI/Tokenizer.pm @@ -102,7 +102,35 @@ my %X_CAN_FOLLOW_OPERATOR = map { $_ => 1 } qw( -- ++ ); # These are the exceptions. my %X_CAN_FOLLOW_STRUCTURE = map { $_ => 1 } qw( } ] \) ); - +# Something that looks like the x operator but follows a word +# is usually that word's argument. +# These are the exceptions. +# chop, chomp, dump are ambiguous because they can have either parms +# or no parms. +my %X_CAN_FOLLOW_WORD = map { $_ => 1 } qw( + endgrent + endhostent + endnetent + endprotoent + endpwent + endservent + fork + getgrent + gethostent + getlogin + getnetent + getppid + getprotoent + getpwent + getservent + setgrent + setpwent + time + times + wait + wantarray + __SUB__ +); @@ -758,13 +786,17 @@ sub _opcontext { # Assuming we are currently parsing the word 'x', return true # if previous tokens imply the x is an operator, false otherwise. sub _current_x_is_operator { - my $self = shift; + my ( $self ) = @_; + return if !@{$self->{tokens}}; + + my ($prev, $prevprev) = @{ $self->_previous_significant_tokens(2) }; + return if !$prev; - my $prev = $self->_last_significant_token; - return - $prev - && (!$prev->isa('PPI::Token::Operator') || $X_CAN_FOLLOW_OPERATOR{$prev}) + return !$self->__current_token_is_forced_word if $prev->isa('PPI::Token::Word'); + + return (!$prev->isa('PPI::Token::Operator') || $X_CAN_FOLLOW_OPERATOR{$prev}) && (!$prev->isa('PPI::Token::Structure') || $X_CAN_FOLLOW_STRUCTURE{$prev}) + && !$prev->isa('PPI::Token::Label') ; } diff --git a/t/ppi_statement_package.t b/t/ppi_statement_package.t index 84b12593..53608fc9 100644 --- a/t/ppi_statement_package.t +++ b/t/ppi_statement_package.t @@ -63,10 +63,6 @@ END_PERL is( $packages->[3]->version, '0.09', 'Package 4 returns correct version' ); } -my %known_bad = map { ( "package $_" => 1 ) } - 'x64 0.50 ;', 'x64 0.50 { 1 }', 'x64 0.50;', 'x64 0.50{ 1 }', 'x64 ;', 'x64 v1.2.3 ;', 'x64 v1.2.3 { 1 }', 'x64 v1.2.3;', 'x64 v1.2.3{ 1 }', 'x64 { 1 }', - ; - PERL_5_12_SYNTAX: { my @names = ( # normal name @@ -135,8 +131,6 @@ sub prepare_package_test { sub test_package_blocks { my ( $code, $expected_package_tokens ) = @_; -TODO: { - local $TODO = $known_bad{$code} ? "known bug" : undef; subtest "'$code'", sub { my $Document = PPI::Document->new( \"$code 999;" ); @@ -151,7 +145,8 @@ TODO: { isa_ok( $Document->schild(1), 'PPI::Statement', "code prior statement end recognized" ); isa_ok( eval { $Document->schild(1)->schild(0) }, 'PPI::Token::Number', "inner code" ); is( eval { $Document->schild(1)->schild(0) }, '999', "number correct" ); + }; -} + return; } diff --git a/t/ppi_statement_sub.t b/t/ppi_statement_sub.t index cfccc9be..04842ad2 100644 --- a/t/ppi_statement_sub.t +++ b/t/ppi_statement_sub.t @@ -27,8 +27,6 @@ NAME: { my $code = $test->{code}; my $name = $test->{name}; -TODO: { - local $TODO = $code eq 'sub x64 {}' ? "known bug" : undef; subtest "'$code'", => sub { my $Document = PPI::Document->new( \$code ); @@ -41,7 +39,6 @@ TODO: { is( eval { $sub_statement->name }, $name, "name() correct" ); }; -} } } diff --git a/t/ppi_token_operator.t b/t/ppi_token_operator.t index 695aec7c..2f9d5350 100644 --- a/t/ppi_token_operator.t +++ b/t/ppi_token_operator.t @@ -587,15 +587,12 @@ OPERATOR_X: { if ( $expected->[0] !~ /^PPI::Statement/ ) { unshift @$expected, 'PPI::Statement', $test->{code}; } -TODO: { - local $TODO = $test->{code} eq "LABEL: x64" ? "known bug" : undef; my $ok = is_deeply( $tokens, $expected, $test->{desc} ); if ( !$ok ) { diag "$test->{code} ($test->{desc})\n"; diag explain $tokens; diag explain $test->{expected}; } -} } }