diff --git a/cloc b/cloc index a689ea97..73e0d1e8 100755 --- a/cloc +++ b/cloc @@ -6199,27 +6199,31 @@ sub rm_comments_in_strings { # {{{1 foreach my $line (@{$ra_lines}) { #print "line=[$line]\n"; my $new_line = ""; + if ($line !~ /${string_marker}/) { # short circuit; no strings on this line - if ( $in_ml_string ) { - $line =~ s/\Q${start_comment}\E/xx/g; + if ( $in_ml_string ) { + $line =~ s/\Q${start_comment}\E/xx/g; $line =~ s/\Q${end_comment}\E/xx/g if $end_comment; - } + } push @save_lines, $line; next; } + # replace backslashed string markers with 'Q' $line =~ s/\\${string_marker}/Q/g; - if ( $in_ml_string and $line =~ /^(.*?)(${string_marker})(.*)$/ ) { - # A multiline string ends on this line. Process the part - # until the end of the multiline string first. + + if ( $in_ml_string and $line =~ /^(.*?)(${string_marker})(.*)$/ ) { + # A multiline string ends on this line. Process the part + # until the end of the multiline string first. my ($lastpart_ml_string, $firstpart_marker, $rest_of_line ) = ($1, $2, $3); - $lastpart_ml_string =~ s/\Q${start_comment}\E/xx/g; - $lastpart_ml_string =~ s/\Q${end_comment}\E/xx/g if $end_comment; - $new_line = $lastpart_ml_string . $firstpart_marker; - $line = $rest_of_line; - $in_ml_string = 0; - } + $lastpart_ml_string =~ s/\Q${start_comment}\E/xx/g; + $lastpart_ml_string =~ s/\Q${end_comment}\E/xx/g if $end_comment; + $new_line = $lastpart_ml_string . $firstpart_marker; + $line = $rest_of_line; + $in_ml_string = 0; + } + my @tokens = split(/(${string_marker}.*?${string_marker})/, $line); foreach my $t (@tokens) { #printf " t0 = [$t]\n"; @@ -6227,25 +6231,34 @@ sub rm_comments_in_strings { # {{{1 # enclosed in quotes; process this token $t =~ s/\Q${start_comment}\E/xx/g; $t =~ s/\Q${end_comment}\E/xx/g if $end_comment; - } - elsif ( $multiline_mode and $t =~ /(${string_marker})/ ) { - # Unclosed quote present in line. If multiline_mode is enabled, - # consider it the start of a multiline string. - my $firstpart_marker = $1; + } + elsif ( $multiline_mode and $t =~ /(${string_marker})/ ) { + # Unclosed quote present in line. If multiline_mode is enabled, + # consider it the start of a multiline string. + my $firstpart_marker = $1; my @sub_token = split(/${string_marker}/, $t ); - if ( scalar @sub_token == 2 ) { - $t = $sub_token[0] . $firstpart_marker; - $sub_token[1] =~ s/\Q${start_comment}\E/xx/g; - $sub_token[1] =~ s/\Q${end_comment}\E/xx/g if $end_comment; - $t .= $sub_token[1]; - $in_ml_string = 1; - } else { - print "Warning: rm_comments_in_string length \@sub_token > 2\n"; - } - } - #printf " t1 = [$t]\n"; - $new_line .= $t; + if ( scalar @sub_token == 1 ) { + # The line ends with a string marker that starts + # a multiline string. + $t = $sub_token[0] . $firstpart_marker; + $in_ml_string = 1; + } + elsif ( scalar @sub_token == 2 ) { + # The line has some more content after the string + # marker that starts a multiline string + $t = $sub_token[0] . $firstpart_marker; + $sub_token[1] =~ s/\Q${start_comment}\E/xx/g; + $sub_token[1] =~ s/\Q${end_comment}\E/xx/g if $end_comment; + $t .= $sub_token[1]; + $in_ml_string = 1; + } else { + print "Warning: rm_comments_in_string length \@sub_token > 2\n"; + } + + } + #printf " t1 = [$t]\n"; + $new_line .= $t; } push @save_lines, $new_line; }