Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
import com.sonar.sslr.impl.Lexer;

/**
*/
*/
public class StringLiteralsChannel extends Channel<Lexer> {

private static final char EOF = (char) -1;
Expand All @@ -36,6 +36,7 @@ public class StringLiteralsChannel extends Channel<Lexer> {

private int index;
private char ch;
private boolean isRawString = false;

@Override
public boolean consume(CodeReader code, Lexer output) {
Expand All @@ -46,26 +47,31 @@ public boolean consume(CodeReader code, Lexer output) {
if ((ch != '\"')) {
return false;
}
if (!read(code)) {
return false;
if (isRawString) {
if (!readRawString(code)) {
return false;
}
} else {
if (!readString(code)) {
return false;
}
}

for (int i = 0; i < index; i++) {
sb.append((char) code.pop());
}
output.addToken(Token.builder()
.setLine(line)
.setColumn(column)
.setURI(output.getURI())
.setValueAndOriginalValue(sb.toString())
.setType(CxxTokenType.STRING)
.build());
sb.setLength(0);
.setLine(line)
.setColumn(column)
.setURI(output.getURI())
.setValueAndOriginalValue(sb.toString())
.setType(CxxTokenType.STRING)
.build());
sb.setLength(0);
return true;
}

private boolean read(CodeReader code) {
// TODO: proper reading raw strings.

private boolean readString(CodeReader code) {
index++;
while (code.charAt(index) != ch) {
if (code.charAt(index) == EOF) {
Expand All @@ -81,8 +87,42 @@ private boolean read(CodeReader code) {
return true;
}

private boolean readRawString(CodeReader code) {
// "delimiter( raw_character* )delimiter"
index++;
while (code.charAt(index) != '(') { // delimiter
if (code.charAt(index) == EOF) {
return false;
}
sb.append(code.charAt(index));
index++;
}
String delimiter = sb.toString();
do {
sb.setLength(0);
while (code.charAt(index) != ')') { // raw_character*
if (code.charAt(index) == EOF) {
return false;
}
index++;
}
index++;
while (code.charAt(index) != '"') { // delimiter
if (code.charAt(index) == EOF) {
return false;
}
sb.append(code.charAt(index));
index++;
}
} while (!sb.toString().equals(delimiter));
sb.setLength(0);
index++;
return true;
}

private void readStringPrefix(CodeReader code) {
ch = code.charAt(index);
isRawString = false;
if ((ch == 'u') || (ch == 'U') || ch == 'L') {
index++;
if (ch == 'u' && code.charAt(index) == '8') {
Expand All @@ -92,6 +132,7 @@ private void readStringPrefix(CodeReader code) {
}
if (ch == 'R') {
index++;
isRawString = true;
ch = code.charAt(index);
}
}
Expand Down
23 changes: 18 additions & 5 deletions cxx-squid/src/test/java/org/sonar/cxx/lexer/CxxLexerTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -446,18 +446,31 @@ public void string_literals() {

@Test
public void rawstring_literals() {
assertThat("raw string: empty", lexer.lex("R\"\""), hasToken("R\"\"", CxxTokenType.STRING));
assertThat("raw string: prefix u", lexer.lex("uR\"\""), hasToken("uR\"\"", CxxTokenType.STRING));
assertThat("raw string: prefix u8R", lexer.lex("u8R\"\""), hasToken("u8R\"\"", CxxTokenType.STRING));
assertThat("raw string: prefix UR", lexer.lex("UR\"\""), hasToken("UR\"\"", CxxTokenType.STRING));
assertThat("raw string: prefix LR", lexer.lex("LR\"\""), hasToken("LR\"\"", CxxTokenType.STRING));
assertThat("raw string: empty", lexer.lex("R\"(...)\""), hasToken("R\"(...)\"", CxxTokenType.STRING));
assertThat("raw string: prefix u", lexer.lex("uR\"(...)\""), hasToken("uR\"(...)\"", CxxTokenType.STRING));
assertThat("raw string: prefix u8R", lexer.lex("u8R\"(...)\""), hasToken("u8R\"(...)\"", CxxTokenType.STRING));
assertThat("raw string: prefix UR", lexer.lex("UR\"(...)\""), hasToken("UR\"(...)\"", CxxTokenType.STRING));
assertThat("raw string: prefix LR", lexer.lex("LR\"(...)\""), hasToken("LR\"(...)\"", CxxTokenType.STRING));

// examples from the standard
assertThat("raw string: std example 1", lexer.lex("R\"(...)\""), hasToken("R\"(...)\"", CxxTokenType.STRING));
assertThat("raw string: std example 2", lexer.lex("u8R\"**(...)**\""), hasToken("u8R\"**(...)**\"", CxxTokenType.STRING));
assertThat("raw string: std example 3", lexer.lex("uR\"*∼(...)*∼\""), hasToken("uR\"*∼(...)*∼\"", CxxTokenType.STRING));
assertThat("raw string: std example 4", lexer.lex("UR\"zzz(...)zzz\""), hasToken("UR\"zzz(...)zzz\"", CxxTokenType.STRING));
assertThat("raw string: std example 5", lexer.lex("LR\"(...)\""), hasToken("LR\"(...)\"", CxxTokenType.STRING));

assertThat("raw string: an unescaped \\ character",
lexer.lex("R\"(An unescaped \\ character)\""), hasToken("R\"(An unescaped \\ character)\"", CxxTokenType.STRING));

assertThat("raw string: an unescaped \" character",
lexer.lex("R\"(An unescaped \" character)\""), hasToken("R\"(An unescaped \" character)\"", CxxTokenType.STRING));

assertThat("raw string: represent the string: )\"",
lexer.lex("R\"xyz()\")xyz\""), hasToken("R\"xyz()\")xyz\"", CxxTokenType.STRING));

assertThat("raw string: complex example",
lexer.lex("R\"X*X(A C++11 raw string literal can be specified like this: R\"(This is my raw string)\" )X*X\""),
hasToken("R\"X*X(A C++11 raw string literal can be specified like this: R\"(This is my raw string)\" )X*X\"", CxxTokenType.STRING));
}

@Test
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
//@todo
//char* txt1 = u8R"XXX(I'm a "raw UTF-8" string.)XXX";
//char16_t* txt2 = uR"*(This is a "raw UTF-16" string.)*";
//char32_t* txt3 = UR"(This is a "raw UTF-32" string.)";
char* txt1 = u8R"XXX(I'm a "raw UTF-8" string.)XXX";
char16_t* txt2 = uR"*(This is a "raw UTF-16" string.)*";
char32_t* txt3 = UR"(This is a "raw UTF-32" string.)";
string s = R"X*X(A C++11 raw string literal can be specified like this: R"(This is my raw string)" )X*X";