diff --git a/lib/PPI/Token/Whitespace.pm b/lib/PPI/Token/Whitespace.pm index 1ab2880c..1a5fdd01 100644 --- a/lib/PPI/Token/Whitespace.pm +++ b/lib/PPI/Token/Whitespace.pm @@ -390,19 +390,30 @@ sub __TOKENIZER__on_char { return 'Operator'; } elsif ( $char == 120 ) { # $char eq 'x' - # x followed immediately by a digit can be the x - # operator or a word. Disambiguate by checking - # whether the previous token is an operator that cannot be - # followed by the x operator, e.g.: +. - # - # x followed immediately by '=' is the 'x=' operator, not - # 'x ='. An important exception is x followed immediately by - # '=>', which makes the x into a bareword. - pos $t->{line} = $t->{line_cursor} + 1; - return 'Operator' - if $t->_current_x_is_operator and $t->{line} =~ m/\G(?:\d|(?!(=>|[\w\s])))/gc; - - # Otherwise, commit like a normal bareword + # Could be a word, the x= operator, the x operator + # followed by whitespace, or the x operator without any + # space between itself and its operand, e.g.: '$a x3', + # which is the same as '$a x 3'. _current_x_is_operator + # assumes we have a complete 'x' token, but we don't + # yet. We may need to split this x character apart from + # what follows it. + if ( $t->_current_x_is_operator ) { + pos $t->{line} = $t->{line_cursor} + 1; + return 'Operator' if $t->{line} =~ m/\G(?: + \d # x op with no whitespace e.g. 'x3' + | + (?!( # negative lookahead + => # not on left of fat comma + | + \w # not a word like "xyzzy" + | + \s # not x op plus whitespace + )) + )/gcx; + } + + # Otherwise, commit like a normal bareword, including x + # operator followed by whitespace. return PPI::Token::Word->__TOKENIZER__commit($t); } elsif ( $char == 45 ) { # $char eq '-' diff --git a/lib/PPI/Tokenizer.pm b/lib/PPI/Tokenizer.pm index 59150b5b..c6c91c7e 100644 --- a/lib/PPI/Tokenizer.pm +++ b/lib/PPI/Tokenizer.pm @@ -102,7 +102,35 @@ my %X_CAN_FOLLOW_OPERATOR = map { $_ => 1 } qw( -- ++ ); # These are the exceptions. my %X_CAN_FOLLOW_STRUCTURE = map { $_ => 1 } qw( } ] \) ); - +# Something that looks like the x operator but follows a word +# is usually that word's argument. +# These are the exceptions. +# chop, chomp, dump are ambiguous because they can have either parms +# or no parms. +my %X_CAN_FOLLOW_WORD = map { $_ => 1 } qw( + endgrent + endhostent + endnetent + endprotoent + endpwent + endservent + fork + getgrent + gethostent + getlogin + getnetent + getppid + getprotoent + getpwent + getservent + setgrent + setpwent + time + times + wait + wantarray + __SUB__ +); @@ -758,13 +786,17 @@ sub _opcontext { # Assuming we are currently parsing the word 'x', return true # if previous tokens imply the x is an operator, false otherwise. sub _current_x_is_operator { - my $self = shift; + my ( $self ) = @_; + return if !@{$self->{tokens}}; + + my ($prev, $prevprev) = @{ $self->_previous_significant_tokens(2) }; + return if !$prev; - my $prev = $self->_last_significant_token; - return - $prev - && (!$prev->isa('PPI::Token::Operator') || $X_CAN_FOLLOW_OPERATOR{$prev}) + return !$self->__current_token_is_forced_word if $prev->isa('PPI::Token::Word'); + + return (!$prev->isa('PPI::Token::Operator') || $X_CAN_FOLLOW_OPERATOR{$prev}) && (!$prev->isa('PPI::Token::Structure') || $X_CAN_FOLLOW_STRUCTURE{$prev}) + && !$prev->isa('PPI::Token::Label') ; }