Skip to content

Commit

Permalink
fix GitHub #122: "x64" being parsed as x operator plus number
Browse files Browse the repository at this point in the history
fixes #122
fixes #123
  • Loading branch information
moregan authored and wchristian committed May 12, 2017
1 parent 4dfbc99 commit 6988c84
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 19 deletions.
37 changes: 24 additions & 13 deletions lib/PPI/Token/Whitespace.pm
Original file line number Diff line number Diff line change
Expand Up @@ -390,19 +390,30 @@ sub __TOKENIZER__on_char {
return 'Operator';

} elsif ( $char == 120 ) { # $char eq 'x'
# x followed immediately by a digit can be the x
# operator or a word. Disambiguate by checking
# whether the previous token is an operator that cannot be
# followed by the x operator, e.g.: +.
#
# x followed immediately by '=' is the 'x=' operator, not
# 'x ='. An important exception is x followed immediately by
# '=>', which makes the x into a bareword.
pos $t->{line} = $t->{line_cursor} + 1;
return 'Operator'
if $t->_current_x_is_operator and $t->{line} =~ m/\G(?:\d|(?!(=>|[\w\s])))/gc;

# Otherwise, commit like a normal bareword
# Could be a word, the x= operator, the x operator
# followed by whitespace, or the x operator without any
# space between itself and its operand, e.g.: '$a x3',
# which is the same as '$a x 3'. _current_x_is_operator
# assumes we have a complete 'x' token, but we don't
# yet. We may need to split this x character apart from
# what follows it.
if ( $t->_current_x_is_operator ) {
pos $t->{line} = $t->{line_cursor} + 1;
return 'Operator' if $t->{line} =~ m/\G(?:
\d # x op with no whitespace e.g. 'x3'
|
(?!( # negative lookahead
=> # not on left of fat comma
|
\w # not a word like "xyzzy"
|
\s # not x op plus whitespace
))
)/gcx;
}

# Otherwise, commit like a normal bareword, including x
# operator followed by whitespace.
return PPI::Token::Word->__TOKENIZER__commit($t);

} elsif ( $char == 45 ) { # $char eq '-'
Expand Down
44 changes: 38 additions & 6 deletions lib/PPI/Tokenizer.pm
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,35 @@ my %X_CAN_FOLLOW_OPERATOR = map { $_ => 1 } qw( -- ++ );
# These are the exceptions.
my %X_CAN_FOLLOW_STRUCTURE = map { $_ => 1 } qw( } ] \) );


# Something that looks like the x operator but follows a word
# is usually that word's argument.
# These are the exceptions.
# chop, chomp, dump are ambiguous because they can have either parms
# or no parms.
my %X_CAN_FOLLOW_WORD = map { $_ => 1 } qw(
endgrent
endhostent
endnetent
endprotoent
endpwent
endservent
fork
getgrent
gethostent
getlogin
getnetent
getppid
getprotoent
getpwent
getservent
setgrent
setpwent
time
times
wait
wantarray
__SUB__
);



Expand Down Expand Up @@ -758,13 +786,17 @@ sub _opcontext {
# Assuming we are currently parsing the word 'x', return true
# if previous tokens imply the x is an operator, false otherwise.
sub _current_x_is_operator {
my $self = shift;
my ( $self ) = @_;
return if !@{$self->{tokens}};

my ($prev, $prevprev) = @{ $self->_previous_significant_tokens(2) };
return if !$prev;

my $prev = $self->_last_significant_token;
return
$prev
&& (!$prev->isa('PPI::Token::Operator') || $X_CAN_FOLLOW_OPERATOR{$prev})
return !$self->__current_token_is_forced_word if $prev->isa('PPI::Token::Word');

return (!$prev->isa('PPI::Token::Operator') || $X_CAN_FOLLOW_OPERATOR{$prev})
&& (!$prev->isa('PPI::Token::Structure') || $X_CAN_FOLLOW_STRUCTURE{$prev})
&& !$prev->isa('PPI::Token::Label')
;
}

Expand Down

0 comments on commit 6988c84

Please sign in to comment.