From 6e31710c045dafbbf99c5d435220130672a9b156 Mon Sep 17 00:00:00 2001
From: Mike O'Regan <moregan@stresscafe.com>
Date: Sun, 16 Nov 2014 19:06:58 -0600
Subject: [PATCH] fix GitHub #122: "x64" being parsed as x operator plus number

fixes #122
fixes #123
---
 lib/PPI/Token/Whitespace.pm | 37 ++++++++++++++++++++-----------
 lib/PPI/Tokenizer.pm        | 44 ++++++++++++++++++++++++++++++++-----
 t/ppi_statement_package.t   |  9 ++------
 t/ppi_statement_sub.t       |  3 ---
 t/ppi_token_operator.t      |  3 ---
 5 files changed, 64 insertions(+), 32 deletions(-)

diff --git a/lib/PPI/Token/Whitespace.pm b/lib/PPI/Token/Whitespace.pm
index 1ab2880c..1a5fdd01 100644
--- a/lib/PPI/Token/Whitespace.pm
+++ b/lib/PPI/Token/Whitespace.pm
@@ -390,19 +390,30 @@ sub __TOKENIZER__on_char {
 		return 'Operator';
 
 	} elsif ( $char == 120 ) { # $char eq 'x'
-		# x followed immediately by a digit can be the x
-		# operator or a word.  Disambiguate by checking
-		# whether the previous token is an operator that cannot be
-		# followed by the x operator, e.g.: +.
-		#
-		# x followed immediately by '=' is the 'x=' operator, not
-		# 'x ='. An important exception is x followed immediately by
-		# '=>', which makes the x into a bareword.
-		pos $t->{line} = $t->{line_cursor} + 1;
-		return 'Operator'
-			if $t->_current_x_is_operator and $t->{line} =~ m/\G(?:\d|(?!(=>|[\w\s])))/gc;
-
-		# Otherwise, commit like a normal bareword
+		# Could be a word, the x= operator, the x operator
+		# followed by whitespace, or the x operator without any
+		# space between itself and its operand, e.g.: '$a x3',
+		# which is the same as '$a x 3'.  _current_x_is_operator
+		# assumes we have a complete 'x' token, but we don't
+		# yet.  We may need to split this x character apart from
+		# what follows it.
+		if ( $t->_current_x_is_operator ) {
+			pos $t->{line} = $t->{line_cursor} + 1;
+			return 'Operator' if $t->{line} =~ m/\G(?:
+				\d  # x op with no whitespace e.g. 'x3'
+				|
+				(?!(  # negative lookahead
+					=>  # not on left of fat comma
+					|
+					\w  # not a word like "xyzzy"
+					|
+					\s  # not x op plus whitespace
+				))
+			)/gcx;
+		}
+
+		# Otherwise, commit like a normal bareword, including x
+		# operator followed by whitespace.
 		return PPI::Token::Word->__TOKENIZER__commit($t);
 
 	} elsif ( $char == 45 ) { # $char eq '-'
diff --git a/lib/PPI/Tokenizer.pm b/lib/PPI/Tokenizer.pm
index 75ad5f11..61ba9c52 100644
--- a/lib/PPI/Tokenizer.pm
+++ b/lib/PPI/Tokenizer.pm
@@ -102,7 +102,35 @@ my %X_CAN_FOLLOW_OPERATOR = map { $_ => 1 } qw( -- ++ );
 # These are the exceptions.
 my %X_CAN_FOLLOW_STRUCTURE = map { $_ => 1 } qw( } ] \) );
 
-
+# Something that looks like the x operator but follows a word
+# is usually that word's argument. 
+# These are the exceptions.
+# chop, chomp, dump are ambiguous because they can have either parms
+# or no parms.
+my %X_CAN_FOLLOW_WORD = map { $_ => 1 } qw(
+		endgrent
+		endhostent
+		endnetent
+		endprotoent
+		endpwent
+		endservent
+		fork
+		getgrent
+		gethostent
+		getlogin
+		getnetent
+		getppid
+		getprotoent
+		getpwent
+		getservent
+		setgrent
+		setpwent
+		time
+		times
+		wait
+		wantarray
+		__SUB__
+);
 
 
 
@@ -758,13 +786,17 @@ sub _opcontext {
 # Assuming we are currently parsing the word 'x', return true
 # if previous tokens imply the x is an operator, false otherwise.
 sub _current_x_is_operator {
-	my $self = shift;
+	my ( $self ) = @_;
+	return if !@{$self->{tokens}};
+
+	my ($prev, $prevprev) = @{ $self->_previous_significant_tokens(2) };
+	return if !$prev;
 
-	my $prev = $self->_last_significant_token;
-	return 
-		$prev
-		&& (!$prev->isa('PPI::Token::Operator') || $X_CAN_FOLLOW_OPERATOR{$prev})
+	return !$self->__current_token_is_forced_word if $prev->isa('PPI::Token::Word');
+	
+	return (!$prev->isa('PPI::Token::Operator') || $X_CAN_FOLLOW_OPERATOR{$prev})
 		&& (!$prev->isa('PPI::Token::Structure') || $X_CAN_FOLLOW_STRUCTURE{$prev})
+		&& !$prev->isa('PPI::Token::Label')
 	;
 }
 
diff --git a/t/ppi_statement_package.t b/t/ppi_statement_package.t
index 84b12593..53608fc9 100644
--- a/t/ppi_statement_package.t
+++ b/t/ppi_statement_package.t
@@ -63,10 +63,6 @@ END_PERL
 	is( $packages->[3]->version, '0.09', 'Package 4 returns correct version' );
 }
 
-my %known_bad = map { ( "package $_" => 1 ) }
-  'x64 0.50 ;', 'x64 0.50 { 1 }', 'x64 0.50;', 'x64 0.50{ 1 }', 'x64 ;', 'x64 v1.2.3 ;', 'x64 v1.2.3 { 1 }', 'x64 v1.2.3;', 'x64 v1.2.3{ 1 }', 'x64 { 1 }',
-  ;
-
 PERL_5_12_SYNTAX: {
 	my @names = (
 		# normal name
@@ -135,8 +131,6 @@ sub prepare_package_test {
 sub test_package_blocks {
 	my ( $code, $expected_package_tokens ) = @_;
 
-TODO: {
-	local $TODO = $known_bad{$code} ? "known bug" : undef;
 	subtest "'$code'", sub {
 
 	my $Document = PPI::Document->new( \"$code 999;" );
@@ -151,7 +145,8 @@ TODO: {
 	isa_ok( $Document->schild(1), 'PPI::Statement', "code prior statement end recognized" );
 	isa_ok( eval { $Document->schild(1)->schild(0) }, 'PPI::Token::Number', "inner code" );
 	is(     eval { $Document->schild(1)->schild(0) }, '999', "number correct"  );
+
 	};
-}
+
 	return;
 }
diff --git a/t/ppi_statement_sub.t b/t/ppi_statement_sub.t
index cfccc9be..04842ad2 100644
--- a/t/ppi_statement_sub.t
+++ b/t/ppi_statement_sub.t
@@ -27,8 +27,6 @@ NAME: {
 		my $code = $test->{code};
 		my $name = $test->{name};
 
-TODO:   {
-		local $TODO = $code eq 'sub x64 {}' ? "known bug" : undef;
 		subtest "'$code'", => sub {
 
 		my $Document = PPI::Document->new( \$code );
@@ -41,7 +39,6 @@ TODO:   {
 		is( eval { $sub_statement->name }, $name, "name() correct" );
 
 		};
-}
 
 	}
 }
diff --git a/t/ppi_token_operator.t b/t/ppi_token_operator.t
index 695aec7c..2f9d5350 100644
--- a/t/ppi_token_operator.t
+++ b/t/ppi_token_operator.t
@@ -587,15 +587,12 @@ OPERATOR_X: {
 		if ( $expected->[0] !~ /^PPI::Statement/ ) {
 			unshift @$expected, 'PPI::Statement', $test->{code};
 		}
-TODO: {
-		local $TODO = $test->{code} eq "LABEL: x64" ? "known bug" : undef;
 		my $ok = is_deeply( $tokens, $expected, $test->{desc} );
 		if ( !$ok ) {
 			diag "$test->{code} ($test->{desc})\n";
 			diag explain $tokens;
 			diag explain $test->{expected};
 		}
-}
 	}
 }