Skip to content

Commit

Permalink
fix GitHub #122: "x64" being parsed as x operator plus number
Browse files Browse the repository at this point in the history
fixes #122
fixes #123
  • Loading branch information
moregan authored and wchristian committed May 13, 2017
1 parent 9a910d7 commit f2882cc
Show file tree
Hide file tree
Showing 5 changed files with 64 additions and 32 deletions.
37 changes: 24 additions & 13 deletions lib/PPI/Token/Whitespace.pm
Original file line number Diff line number Diff line change
Expand Up @@ -390,19 +390,30 @@ sub __TOKENIZER__on_char {
return 'Operator';

} elsif ( $char == 120 ) { # $char eq 'x'
# x followed immediately by a digit can be the x
# operator or a word. Disambiguate by checking
# whether the previous token is an operator that cannot be
# followed by the x operator, e.g.: +.
#
# x followed immediately by '=' is the 'x=' operator, not
# 'x ='. An important exception is x followed immediately by
# '=>', which makes the x into a bareword.
pos $t->{line} = $t->{line_cursor} + 1;
return 'Operator'
if $t->_current_x_is_operator and $t->{line} =~ m/\G(?:\d|(?!(=>|[\w\s])))/gc;

# Otherwise, commit like a normal bareword
# Could be a word, the x= operator, the x operator
# followed by whitespace, or the x operator without any
# space between itself and its operand, e.g.: '$a x3',
# which is the same as '$a x 3'. _current_x_is_operator
# assumes we have a complete 'x' token, but we don't
# yet. We may need to split this x character apart from
# what follows it.
if ( $t->_current_x_is_operator ) {
pos $t->{line} = $t->{line_cursor} + 1;
return 'Operator' if $t->{line} =~ m/\G(?:
\d # x op with no whitespace e.g. 'x3'
|
(?!( # negative lookahead
=> # not on left of fat comma
|
\w # not a word like "xyzzy"
|
\s # not x op plus whitespace
))
)/gcx;
}

# Otherwise, commit like a normal bareword, including x
# operator followed by whitespace.
return PPI::Token::Word->__TOKENIZER__commit($t);

} elsif ( $char == 45 ) { # $char eq '-'
Expand Down
44 changes: 38 additions & 6 deletions lib/PPI/Tokenizer.pm
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,35 @@ my %X_CAN_FOLLOW_OPERATOR = map { $_ => 1 } qw( -- ++ );
# These are the exceptions.
my %X_CAN_FOLLOW_STRUCTURE = map { $_ => 1 } qw( } ] \) );


# Something that looks like the x operator but follows a word
# is usually that word's argument.
# These are the exceptions.
# chop, chomp, dump are ambiguous because they can have either parms
# or no parms.
my %X_CAN_FOLLOW_WORD = map { $_ => 1 } qw(
endgrent
endhostent
endnetent
endprotoent
endpwent
endservent
fork
getgrent
gethostent
getlogin
getnetent
getppid
getprotoent
getpwent
getservent
setgrent
setpwent
time
times
wait
wantarray
__SUB__
);



Expand Down Expand Up @@ -758,13 +786,17 @@ sub _opcontext {
# Assuming we are currently parsing the word 'x', return true
# if previous tokens imply the x is an operator, false otherwise.
sub _current_x_is_operator {
my $self = shift;
my ( $self ) = @_;
return if !@{$self->{tokens}};

my ($prev, $prevprev) = @{ $self->_previous_significant_tokens(2) };
return if !$prev;

my $prev = $self->_last_significant_token;
return
$prev
&& (!$prev->isa('PPI::Token::Operator') || $X_CAN_FOLLOW_OPERATOR{$prev})
return !$self->__current_token_is_forced_word if $prev->isa('PPI::Token::Word');

return (!$prev->isa('PPI::Token::Operator') || $X_CAN_FOLLOW_OPERATOR{$prev})
&& (!$prev->isa('PPI::Token::Structure') || $X_CAN_FOLLOW_STRUCTURE{$prev})
&& !$prev->isa('PPI::Token::Label')
;
}

Expand Down
9 changes: 2 additions & 7 deletions t/ppi_statement_package.t
Original file line number Diff line number Diff line change
Expand Up @@ -63,10 +63,6 @@ END_PERL
is( $packages->[3]->version, '0.09', 'Package 4 returns correct version' );
}

my %known_bad = map { ( "package $_" => 1 ) }
'x64 0.50 ;', 'x64 0.50 { 1 }', 'x64 0.50;', 'x64 0.50{ 1 }', 'x64 ;', 'x64 v1.2.3 ;', 'x64 v1.2.3 { 1 }', 'x64 v1.2.3;', 'x64 v1.2.3{ 1 }', 'x64 { 1 }',
;

PERL_5_12_SYNTAX: {
my @names = (
# normal name
Expand Down Expand Up @@ -135,8 +131,6 @@ sub prepare_package_test {
sub test_package_blocks {
my ( $code, $expected_package_tokens ) = @_;

TODO: {
local $TODO = $known_bad{$code} ? "known bug" : undef;
subtest "'$code'", sub {

my $Document = PPI::Document->new( \"$code 999;" );
Expand All @@ -151,7 +145,8 @@ TODO: {
isa_ok( $Document->schild(1), 'PPI::Statement', "code prior statement end recognized" );
isa_ok( eval { $Document->schild(1)->schild(0) }, 'PPI::Token::Number', "inner code" );
is( eval { $Document->schild(1)->schild(0) }, '999', "number correct" );

};
}

return;
}
3 changes: 0 additions & 3 deletions t/ppi_statement_sub.t
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,6 @@ NAME: {
my $code = $test->{code};
my $name = $test->{name};

TODO: {
local $TODO = $code eq 'sub x64 {}' ? "known bug" : undef;
subtest "'$code'", => sub {

my $Document = PPI::Document->new( \$code );
Expand All @@ -41,7 +39,6 @@ TODO: {
is( eval { $sub_statement->name }, $name, "name() correct" );

};
}

}
}
Expand Down
3 changes: 0 additions & 3 deletions t/ppi_token_operator.t
Original file line number Diff line number Diff line change
Expand Up @@ -587,15 +587,12 @@ OPERATOR_X: {
if ( $expected->[0] !~ /^PPI::Statement/ ) {
unshift @$expected, 'PPI::Statement', $test->{code};
}
TODO: {
local $TODO = $test->{code} eq "LABEL: x64" ? "known bug" : undef;
my $ok = is_deeply( $tokens, $expected, $test->{desc} );
if ( !$ok ) {
diag "$test->{code} ($test->{desc})\n";
diag explain $tokens;
diag explain $test->{expected};
}
}
}
}

Expand Down

0 comments on commit f2882cc

Please sign in to comment.