fix GitHub #122: "x64" being parsed as x operator plus number

fixes #122 fixes #123
Perl-Critic · May 12, 2017 · 6988c84 · 6988c84
1 parent 4dfbc99
commit 6988c84
Show file tree

Hide file tree

Showing 2 changed files with 62 additions and 19 deletions.
diff --git a/lib/PPI/Token/Whitespace.pm b/lib/PPI/Token/Whitespace.pm
@@ -390,19 +390,30 @@ sub __TOKENIZER__on_char {
 		return 'Operator';
 
 	} elsif ( $char == 120 ) { # $char eq 'x'
-		# x followed immediately by a digit can be the x
-		# operator or a word.  Disambiguate by checking
-		# whether the previous token is an operator that cannot be
-		# followed by the x operator, e.g.: +.
-		#
-		# x followed immediately by '=' is the 'x=' operator, not
-		# 'x ='. An important exception is x followed immediately by
-		# '=>', which makes the x into a bareword.
-		pos $t->{line} = $t->{line_cursor} + 1;
-		return 'Operator'
-			if $t->_current_x_is_operator and $t->{line} =~ m/\G(?:\d|(?!(=>|[\w\s])))/gc;
-
-		# Otherwise, commit like a normal bareword
+		# Could be a word, the x= operator, the x operator
+		# followed by whitespace, or the x operator without any
+		# space between itself and its operand, e.g.: '$a x3',
+		# which is the same as '$a x 3'.  _current_x_is_operator
+		# assumes we have a complete 'x' token, but we don't
+		# yet.  We may need to split this x character apart from
+		# what follows it.
+		if ( $t->_current_x_is_operator ) {
+			pos $t->{line} = $t->{line_cursor} + 1;
+			return 'Operator' if $t->{line} =~ m/\G(?:
+				\d  # x op with no whitespace e.g. 'x3'
+				|
+				(?!(  # negative lookahead
+					=>  # not on left of fat comma
+					|
+					\w  # not a word like "xyzzy"
+					|
+					\s  # not x op plus whitespace
+				))
+			)/gcx;
+		}
+
+		# Otherwise, commit like a normal bareword, including x
+		# operator followed by whitespace.
 		return PPI::Token::Word->__TOKENIZER__commit($t);
 
 	} elsif ( $char == 45 ) { # $char eq '-'

diff --git a/lib/PPI/Tokenizer.pm b/lib/PPI/Tokenizer.pm
@@ -102,7 +102,35 @@ my %X_CAN_FOLLOW_OPERATOR = map { $_ => 1 } qw( -- ++ );
 # These are the exceptions.
 my %X_CAN_FOLLOW_STRUCTURE = map { $_ => 1 } qw( } ] \) );
 
-
+# Something that looks like the x operator but follows a word
+# is usually that word's argument. 
+# These are the exceptions.
+# chop, chomp, dump are ambiguous because they can have either parms
+# or no parms.
+my %X_CAN_FOLLOW_WORD = map { $_ => 1 } qw(
+		endgrent
+		endhostent
+		endnetent
+		endprotoent
+		endpwent
+		endservent
+		fork
+		getgrent
+		gethostent
+		getlogin
+		getnetent
+		getppid
+		getprotoent
+		getpwent
+		getservent
+		setgrent
+		setpwent
+		time
+		times
+		wait
+		wantarray
+		__SUB__
+);
 
 
 
@@ -758,13 +786,17 @@ sub _opcontext {
 # Assuming we are currently parsing the word 'x', return true
 # if previous tokens imply the x is an operator, false otherwise.
 sub _current_x_is_operator {
-	my $self = shift;
+	my ( $self ) = @_;
+	return if !@{$self->{tokens}};
+
+	my ($prev, $prevprev) = @{ $self->_previous_significant_tokens(2) };
+	return if !$prev;
 
-	my $prev = $self->_last_significant_token;
-	return 
-		$prev
-		&& (!$prev->isa('PPI::Token::Operator') || $X_CAN_FOLLOW_OPERATOR{$prev})
+	return !$self->__current_token_is_forced_word if $prev->isa('PPI::Token::Word');
+
+	return (!$prev->isa('PPI::Token::Operator') || $X_CAN_FOLLOW_OPERATOR{$prev})
 		&& (!$prev->isa('PPI::Token::Structure') || $X_CAN_FOLLOW_STRUCTURE{$prev})
+		&& !$prev->isa('PPI::Token::Label')
 	;
 }