Skip to content

Commit

Permalink
#223 #227 Fix lexing of rb'' and rf'' python strings
Browse files Browse the repository at this point in the history
  • Loading branch information
jpe authored and nyamatongwe committed Feb 2, 2024
1 parent afaef24 commit 4e05c9d
Show file tree
Hide file tree
Showing 5 changed files with 78 additions and 8 deletions.
5 changes: 5 additions & 0 deletions doc/LexillaHistory.html
Original file line number Diff line number Diff line change
Expand Up @@ -612,6 +612,11 @@ <h3>
<a href="https://github.com/ScintillaOrg/lexilla/issues/94">Issue #94</a>.
</li>
<li>
Python: fix lexing of rb'' and rf'' strings.
<a href="https://github.com/ScintillaOrg/lexilla/issues/223">Issue #223</a>,
<a href="https://github.com/ScintillaOrg/lexilla/pull/227">Pull request #227</a>.
</li>
<li>
Ruby: fix lexing of methods on numeric literals like '3.times' so the '.' and method name do not appear in numeric style.
<a href="https://github.com/ScintillaOrg/lexilla/issues/225">Issue #225</a>.
</li>
Expand Down
32 changes: 24 additions & 8 deletions lexers/LexPython.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,9 @@ constexpr bool IsRawPrefix(int ch) {
}

bool IsPyStringStart(int ch, int chNext, int chNext2, literalsAllowed allowed) noexcept {
// To cover both python2 and python3 lex character prefixes as --
// ur'' is a string, but ru'' is not
// fr'', rf'', br'', rb'' are all strings
if (IsQuote(ch))
return true;
if (IsPyStringTypeChar(ch, allowed)) {
Expand All @@ -94,8 +97,12 @@ bool IsPyStringStart(int ch, int chNext, int chNext2, literalsAllowed allowed) n
if (IsRawPrefix(chNext) && IsQuote(chNext2))
return true;
}
if (IsRawPrefix(ch) && IsQuote(chNext))
return true;
if (IsRawPrefix(ch)) {
if (IsQuote(chNext))
return true;
if (IsPyStringTypeChar(chNext, allowed) && !AnyOf(chNext, 'u', 'U') && IsQuote(chNext2))
return true;
}

return false;
}
Expand Down Expand Up @@ -149,14 +156,22 @@ int PopFromStateStack(std::vector<SingleFStringExpState> &stack, SingleFStringEx
int GetPyStringState(Accessor &styler, Sci_Position i, Sci_PositionU *nextIndex, literalsAllowed allowed) {
char ch = styler.SafeGetCharAt(i);
char chNext = styler.SafeGetCharAt(i + 1);
const int firstIsF = (ch == 'f' || ch == 'F');
bool isFString = false;

// Advance beyond r, u, or ur prefix (or r, b, or br in Python 2.7+ and r, f, or fr in Python 3.6+), but bail if there are any unexpected chars
// Advance beyond r, a type char, or both (in either order)
// Note that this depends on IsPyStringStart to enforce ru'' not being a string
if (IsRawPrefix(ch)) {
i++;
if (IsPyStringTypeChar(chNext, allowed)) {
if (AnyOf(chNext, 'f', 'F'))
isFString = true;
i++;
}
ch = styler.SafeGetCharAt(i);
chNext = styler.SafeGetCharAt(i + 1);
} else if (IsPyStringTypeChar(ch, allowed)) {
if (AnyOf(ch, 'f', 'F'))
isFString = true;
if (IsRawPrefix(chNext))
i += 2;
else
Expand All @@ -165,6 +180,7 @@ int GetPyStringState(Accessor &styler, Sci_Position i, Sci_PositionU *nextIndex,
chNext = styler.SafeGetCharAt(i + 1);
}

// ch and i will be the first quote
if (!IsQuote(ch)) {
*nextIndex = i + 1;
return SCE_P_DEFAULT;
Expand All @@ -174,16 +190,16 @@ int GetPyStringState(Accessor &styler, Sci_Position i, Sci_PositionU *nextIndex,
*nextIndex = i + 3;

if (ch == '"')
return (firstIsF ? SCE_P_FTRIPLEDOUBLE : SCE_P_TRIPLEDOUBLE);
return (isFString ? SCE_P_FTRIPLEDOUBLE : SCE_P_TRIPLEDOUBLE);
else
return (firstIsF ? SCE_P_FTRIPLE : SCE_P_TRIPLE);
return (isFString ? SCE_P_FTRIPLE : SCE_P_TRIPLE);
} else {
*nextIndex = i + 1;

if (ch == '"')
return (firstIsF ? SCE_P_FSTRING : SCE_P_STRING);
return (isFString ? SCE_P_FSTRING : SCE_P_STRING);
else
return (firstIsF ? SCE_P_FCHARACTER : SCE_P_CHARACTER);
return (isFString ? SCE_P_FCHARACTER : SCE_P_CHARACTER);
}
}

Expand Down
16 changes: 16 additions & 0 deletions test/examples/python/strings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Simple raw string
r''

# Raw f-string
rf''
fr''

# Raw byte string
rb''
br''

# Raw unicode strings: ur'' is valid in 2.7 (but not in 3) -- always lexed as
# valid; ru'' is never valid
ru''
ur''

17 changes: 17 additions & 0 deletions test/examples/python/strings.py.folded
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
0 400 0 # Simple raw string
0 400 0 r''
1 400 0
0 400 0 # Raw f-string
0 400 0 rf''
0 400 0 fr''
1 400 0
0 400 0 # Raw byte string
0 400 0 rb''
0 400 0 br''
1 400 0
0 400 0 # Raw unicode strings: ur'' is valid in 2.7 (but not in 3) -- always lexed as
0 400 0 # valid; ru'' is never valid
0 400 0 ru''
0 400 0 ur''
1 400 0
1 400 0
16 changes: 16 additions & 0 deletions test/examples/python/strings.py.styled
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{1}# Simple raw string{0}
{4}r''{0}

{1}# Raw f-string{0}
{17}rf''{0}
{17}fr''{0}

{1}# Raw byte string{0}
{4}rb''{0}
{4}br''{0}

{1}# Raw unicode strings: ur'' is valid in 2.7 (but not in 3) -- always lexed as{0}
{1}# valid; ru'' is never valid{0}
{11}ru{4}''{0}
{4}ur''{0}

0 comments on commit 4e05c9d

Please sign in to comment.