diff --git a/Src/IronPython/Compiler/Tokenizer.cs b/Src/IronPython/Compiler/Tokenizer.cs index 07b0ed5e5..a5c1b5a05 100644 --- a/Src/IronPython/Compiler/Tokenizer.cs +++ b/Src/IronPython/Compiler/Tokenizer.cs @@ -735,11 +735,16 @@ private Token ReadNumber(int start) { } isPrefix0 = true; - while (NextChar('0')) { } // skip leading zeroes + // skip leading zeroes + while (true) { + NextChar('_'); + if (!NextChar('0')) break; + } } bool isFirstChar = true; while (true) { + NextChar('_'); int ch = NextChar(); switch (ch) { @@ -755,7 +760,7 @@ private Token ReadNumber(int start) { MarkTokenEnd(); // TODO: parse in place - return new ConstantValueToken(ParseInteger(GetTokenString(), 10)); + return new ConstantValueToken(ParseInteger(GetTokenSpan(), 10)); case 'j': case 'J': @@ -784,7 +789,7 @@ private Token ReadNumber(int start) { } // TODO: parse in place - return new ConstantValueToken(ParseInteger(GetTokenString(), 10)); + return new ConstantValueToken(ParseInteger(GetTokenSpan(), 10)); } isFirstChar = false; } @@ -795,8 +800,9 @@ private Token ReadBinaryNumber() { int iVal = 0; bool useBigInt = false; BigInteger bigInt = BigInteger.Zero; - bool first = true; + bool isFirstChar = true; while (true) { + NextChar('_'); int ch = NextChar(); switch (ch) { case '0': @@ -812,7 +818,7 @@ private Token ReadBinaryNumber() { bigInt = (BigInteger)iVal; } - if (bits >= 32) { + if (useBigInt) { bigInt = (bigInt << 1) | (ch - '0'); } else { iVal = iVal << 1 | (ch - '0'); @@ -822,22 +828,21 @@ private Token ReadBinaryNumber() { BufferBack(); MarkTokenEnd(); - if (first) { - ReportSyntaxError( - new SourceSpan(new SourceLocation(_tokenEndIndex, IndexToLocation(_tokenEndIndex).Line, IndexToLocation(_tokenEndIndex).Column - 1), - BufferTokenEnd), - Resources.InvalidToken, ErrorCodes.SyntaxError); + if (isFirstChar) { + var errorStart = new SourceLocation(_tokenEndIndex, IndexToLocation(_tokenEndIndex).Line, IndexToLocation(_tokenEndIndex).Column - 1); + ReportSyntaxError(new SourceSpan(errorStart, BufferTokenEnd), Resources.InvalidToken, ErrorCodes.SyntaxError); } return new ConstantValueToken(useBigInt ? bigInt : (object)iVal); } - first = false; + isFirstChar = false; } } private Token ReadOctalNumber() { - bool first = true; + bool isFirstChar = true; while (true) { + NextChar('_'); int ch = NextChar(); switch (ch) { @@ -855,23 +860,24 @@ private Token ReadOctalNumber() { BufferBack(); MarkTokenEnd(); - if (first) { - ReportSyntaxError( - new SourceSpan(new SourceLocation(_tokenEndIndex, IndexToLocation(_tokenEndIndex).Line, IndexToLocation(_tokenEndIndex).Column - 1), - BufferTokenEnd), - Resources.InvalidToken, ErrorCodes.SyntaxError); + if (isFirstChar) { + var errorStart = new SourceLocation(_tokenEndIndex, IndexToLocation(_tokenEndIndex).Line, IndexToLocation(_tokenEndIndex).Column - 1); + ReportSyntaxError(new SourceSpan(errorStart, BufferTokenEnd), Resources.InvalidToken, ErrorCodes.SyntaxError); } // TODO: parse in place - return new ConstantValueToken(ParseInteger(GetTokenSubstring(2), 8)); + var span = GetTokenSpan().Slice(2); + if (!span.IsEmpty && span[0] == '_') span = span.Slice(1); + return new ConstantValueToken(ParseInteger(span, 8)); } - first = false; + isFirstChar = false; } } private Token ReadHexNumber() { - bool first = true; + bool isFirstChar = true; while (true) { + NextChar('_'); int ch = NextChar(); switch (ch) { @@ -903,17 +909,17 @@ private Token ReadHexNumber() { BufferBack(); MarkTokenEnd(); - if (first) { - ReportSyntaxError( - new SourceSpan(new SourceLocation(_tokenEndIndex, IndexToLocation(_tokenEndIndex).Line, IndexToLocation(_tokenEndIndex).Column - 1), - BufferTokenEnd), - Resources.InvalidToken, ErrorCodes.SyntaxError); + if (isFirstChar) { + var errorStart = new SourceLocation(_tokenEndIndex, IndexToLocation(_tokenEndIndex).Line, IndexToLocation(_tokenEndIndex).Column - 1); + ReportSyntaxError(new SourceSpan(errorStart, BufferTokenEnd), Resources.InvalidToken, ErrorCodes.SyntaxError); } // TODO: parse in place - return new ConstantValueToken(ParseInteger(GetTokenSubstring(2), 16)); + var span = GetTokenSpan().Slice(2); + if (!span.IsEmpty && span[0] == '_') span = span.Slice(1); + return new ConstantValueToken(ParseInteger(span, 16)); } - first = false; + isFirstChar = false; } } @@ -1431,10 +1437,8 @@ private void SetIndent(int spaces, StringBuilder chars) { current = DoDedent(spaces, current); if (spaces != current) { - ReportSyntaxError( - new SourceSpan(new SourceLocation(_tokenEndIndex, IndexToLocation(_tokenEndIndex).Line, IndexToLocation(_tokenEndIndex).Column - 1), - BufferTokenEnd), - Resources.IndentationMismatch, ErrorCodes.IndentationError); + var errorStart = new SourceLocation(_tokenEndIndex, IndexToLocation(_tokenEndIndex).Line, IndexToLocation(_tokenEndIndex).Column - 1); + ReportSyntaxError(new SourceSpan(errorStart, BufferTokenEnd), Resources.IndentationMismatch, ErrorCodes.IndentationError); } } } @@ -1448,12 +1452,11 @@ private int DoDedent(int spaces, int current) { return current; } - private object ParseInteger(string s, int radix) { - try { - return LiteralParser.ParseInteger(s, radix); - } catch (ArgumentException e) { - ReportSyntaxError(BufferTokenSpan, e.Message, ErrorCodes.SyntaxError); + private object ParseInteger(ReadOnlySpan s, int radix) { + if (LiteralParser.TryParseIntegerSign(s, radix, out object result)) { + return result; } + ReportSyntaxError(BufferTokenSpan, "invalid token", ErrorCodes.SyntaxError); return ScriptingRuntimeHelpers.Int32ToObject(0); } @@ -1673,6 +1676,9 @@ private string GetTokenSubstring(int offset, int length) { return new String(_buffer, _start + offset, length); } + private ReadOnlySpan GetTokenSpan() + => _buffer.AsSpan(_start, _tokenEnd - _start); + [Conditional("DEBUG")] [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Performance", "CA1822:MarkMembersAsStatic")] private void CheckInvariants() { diff --git a/Src/IronPython/Runtime/LiteralParser.cs b/Src/IronPython/Runtime/LiteralParser.cs index 9d12f8646..8077f9924 100644 --- a/Src/IronPython/Runtime/LiteralParser.cs +++ b/Src/IronPython/Runtime/LiteralParser.cs @@ -155,10 +155,10 @@ private static void HandleEscape(ReadOnlySpan data, ref int i, StringBuild case '6': case '7': { val = ch - '0'; - if (i < length && HexValue(data[i].ToChar(null), out int onechar) && onechar < 8) { + if (i < length && TryConvertDigit(data[i].ToChar(null), 8, out int onechar)) { val = val * 8 + onechar; i++; - if (i < length && HexValue(data[i].ToChar(null), out onechar) && onechar < 8) { + if (i < length && TryConvertDigit(data[i].ToChar(null), 8, out onechar)) { val = val * 8 + onechar; i++; } @@ -562,10 +562,10 @@ internal static List ParseBytes(ReadOnlySpan data, bool isRaw, bool case '6': case '7': { val = ch - '0'; - if (i < length && HexValue(data[i].ToChar(null), out int onechar) && onechar < 8) { + if (i < length && TryConvertDigit(data[i].ToChar(null), 8, out int onechar)) { val = val * 8 + onechar; i++; - if (i < length && HexValue(data[i].ToChar(null), out onechar) && onechar < 8) { + if (i < length && TryConvertDigit(data[i].ToChar(null), 8, out onechar)) { val = val * 8 + onechar; i++; } @@ -598,7 +598,7 @@ internal static List ParseBytes(ReadOnlySpan data, bool isRaw, bool return buf; } - private static bool HexValue(char ch, out int value) { + private static bool TryConvertDigit(char ch, int b, out int value) { switch (ch) { case '0': case '\x660': value = 0; break; @@ -631,52 +631,13 @@ private static bool HexValue(char ch, out int value) { } break; } - return true; - } - - private static int HexValue(char ch) { - int value; - if (!HexValue(ch, out value)) { - throw new ValueErrorException("bad char for integer value: " + ch); - } - return value; - } - - private static int CharValue(char ch, int b) { - int val = HexValue(ch); - if (val >= b) { - throw new ValueErrorException(String.Format("bad char for the integer value: '{0}' (base {1})", ch, b)); - } - return val; - } - - private static bool ParseInt(string text, int b, out int ret) { - ret = 0; - long m = 1; - for (int i = text.Length - 1; i >= 0; i--) { - var ch = text[i]; - - // avoid the exception here. Not only is throwing it expensive, - // but loading the resources for it is also expensive - long lret = ret + m * CharValue(ch, b); - if (int.MinValue <= lret && lret <= int.MaxValue) { - ret = (int)lret; - } else { - return false; - } - - m *= b; - if (int.MinValue > m || m > int.MaxValue) { - return false; - } - } - return true; + return value < b; } private static bool TryParseInt(in ReadOnlySpan text, int start, int length, int b, out int value, out int consumed) where T : IConvertible { value = 0; for (int i = start, end = start + length; i < end; i++) { - if (i < text.Length && HexValue(text[i].ToChar(null), out int onechar) && onechar < b) { + if (i < text.Length && TryConvertDigit(text[i].ToChar(null), b, out int onechar)) { value = value * b + onechar; } else { consumed = i - start; @@ -688,15 +649,10 @@ private static bool TryParseInt(in ReadOnlySpan text, int start, int lengt } public static object ParseInteger(string text, int b) { - Debug.Assert(b != 0); - int iret; - if (!ParseInt(text, b, out iret)) { - BigInteger ret = ParseBigInteger(text, b); - if (!ret.AsInt32(out iret)) { - return ret; - } + if (TryParseInteger(text.AsSpan(), b, false, out object val)) { + return val; } - return ScriptingRuntimeHelpers.Int32ToObject(iret); + throw new ValueErrorException($"invalid literal with base {b}: {text}"); } internal static bool TryParseIntegerSign(ReadOnlySpan text, int b, out object val) { @@ -706,7 +662,7 @@ internal static bool TryParseIntegerSign(ReadOnlySpan text, int b, out obj text = text.Trim(); - if (TryParseIntegerStart(text, ref b, out int sign, out int consumed)) { + if (TryParseIntegerStart(text, ref b, out bool isNegative, out int consumed)) { text = text.Slice(consumed); } else { val = default; @@ -715,11 +671,28 @@ internal static bool TryParseIntegerSign(ReadOnlySpan text, int b, out obj Debug.Assert(!text.IsEmpty); + return TryParseInteger(text, b, isNegative, out val); + } + + private static bool TryParseInteger(ReadOnlySpan text, int b, bool isNegative, out object val) { long ret = 0; + int underscore = 1; for (int i = 0; i < text.Length; i++) { var ch = text[i]; - if (!HexValue(ch, out int digit) || !(digit < b)) { + + if (ch == '_') { + underscore++; + if (underscore > 1) { + val = default; + return false; + } + continue; + } else { + underscore = 0; + } + + if (!TryConvertDigit(ch, b, out int digit)) { val = default; return false; } @@ -728,17 +701,55 @@ internal static bool TryParseIntegerSign(ReadOnlySpan text, int b, out obj if (ret > int.MaxValue) { BigInteger retBi = ret; + + // Repeated integer multiplication is expensive so use a grouping strategy. + // We pick group sizes that ensure our numbers stay in the Int32 range. + int groupMax = 5; // zzzzzz (base 36) = 2_176_782_335 > int.MaxValue + if (b <= 10) groupMax = 9; // 2_147_483_647 + + int buffer = 0; + int cnt = 0; + int smallMult = 1; + for (i++; i < text.Length; i++) { ch = text[i]; - if (!HexValue(ch, out digit) || !(digit < b)) { + + if (ch == '_') { + underscore++; + if (underscore > 1) { + val = default; + return false; + } + continue; + } else { + underscore = 0; + } + + if (!TryConvertDigit(ch, b, out digit)) { val = default; return false; } - retBi = retBi * b + digit; + buffer = buffer * b + digit; + cnt++; + smallMult *= b; + + Debug.Assert(smallMult > 0); // no overflows! + + if (cnt >= groupMax) { + retBi = retBi * smallMult + buffer; + // reset buffer + buffer = 0; + cnt = 0; + smallMult = 1; + } + } + + if (cnt > 0) { + retBi = retBi * smallMult + buffer; } - if (sign < 0) { + if (isNegative) { if (retBi == (BigInteger)int.MaxValue + 1) { val = ScriptingRuntimeHelpers.Int32ToObject(int.MinValue); return true; @@ -747,20 +758,30 @@ internal static bool TryParseIntegerSign(ReadOnlySpan text, int b, out obj return true; } + if (underscore != 0) { + val = default; + return false; + } + val = retBi; return true; } } + if (underscore != 0) { + val = default; + return false; + } + int res = unchecked((int)ret); - res = sign < 0 ? -res : res; + res = isNegative ? -res : res; val = ScriptingRuntimeHelpers.Int32ToObject(res); return true; } - private static bool TryParseIntegerStart(ReadOnlySpan text, ref int b, out int sign, out int consumed) { + private static bool TryParseIntegerStart(ReadOnlySpan text, ref int b, out bool isNegative, out int consumed) { // set defaults - sign = 1; + isNegative = false; consumed = 0; if (text.IsEmpty) return false; @@ -774,7 +795,7 @@ private static bool TryParseIntegerStart(ReadOnlySpan text, ref int b, out // sign? switch (text[start]) { case '-': - sign = -1; + isNegative = true; if (++start >= end) return false; break; case '+': @@ -805,6 +826,9 @@ private static bool TryParseIntegerStart(ReadOnlySpan text, ref int b, out return true; } if (++start >= end) return false; + if (text[start] == '_') { + if (++start >= end) return false; + } } else { b = 10; } @@ -814,35 +838,6 @@ private static bool TryParseIntegerStart(ReadOnlySpan text, ref int b, out return true; } - internal static BigInteger ParseBigInteger(string text, int b) { - Debug.Assert(b != 0); - BigInteger ret = BigInteger.Zero; - BigInteger m = BigInteger.One; - - int i = text.Length - 1; - - int groupMax = 7; - if (b <= 10) groupMax = 9;// 2 147 483 647 - - while (i >= 0) { - // extract digits in a batch - int smallMultiplier = 1; - uint uval = 0; - - for (int j = 0; j < groupMax && i >= 0; j++) { - var ch = text[i--]; - uval = (uint)(CharValue(ch, b) * smallMultiplier + uval); - smallMultiplier *= b; - } - - // this is more generous than needed - ret += m * (BigInteger)uval; - if (i >= 0) m = m * (smallMultiplier); - } - - return ret; - } - internal static bool TryParseFloat(string text, out double res, bool replaceUnicode) { try { // @@ -895,7 +890,7 @@ private static double ParseFloatNoCatch(string text, bool replaceUnicode = true) try { res = double.Parse(s, NumberStyles.Float, System.Globalization.CultureInfo.InvariantCulture); } catch (OverflowException) { - res = text.lstrip().StartsWith("-", StringComparison.Ordinal) ? Double.NegativeInfinity : Double.PositiveInfinity; + res = text.lstrip().StartsWith("-", StringComparison.Ordinal) ? double.NegativeInfinity : double.PositiveInfinity; } return (res == 0.0 && text.lstrip().StartsWith("-", StringComparison.Ordinal)) ? DoubleOps.NegativeZero : res; } @@ -970,7 +965,7 @@ public static Complex ParseComplex(string s) { imag += "1"; // convert +/- to +1/-1 } - return new Complex(String.IsNullOrEmpty(real) ? 0 : ParseFloat(real), ParseFloat(imag)); + return new Complex(string.IsNullOrEmpty(real) ? 0 : ParseFloat(real), ParseFloat(imag)); } else { throw ExnMalformed(); } @@ -986,7 +981,7 @@ public static Complex ParseImaginary(string text) { System.Globalization.CultureInfo.InvariantCulture.NumberFormat )); } catch (OverflowException) { - return new Complex(0, Double.PositiveInfinity); + return new Complex(0, double.PositiveInfinity); } } } diff --git a/Src/IronPython/Runtime/Operations/FloatOps.cs b/Src/IronPython/Runtime/Operations/FloatOps.cs index 30a0bad6f..262eed1e7 100644 --- a/Src/IronPython/Runtime/Operations/FloatOps.cs +++ b/Src/IronPython/Runtime/Operations/FloatOps.cs @@ -162,7 +162,8 @@ public static object fromhex(CodeContext/*!*/ context, PythonType/*!*/ cls, stri BigInteger intVal; if (integer.Success) { - intVal = LiteralParser.ParseBigInteger(integer.Value, 16); + // prefix with 0 to get positive number + intVal = BigInteger.Parse("0" + integer.Value, NumberStyles.HexNumber, CultureInfo.InvariantCulture); } else { intVal = BigInteger.Zero; } diff --git a/Src/IronPythonTest/Cases/CPythonCasesManifest.ini b/Src/IronPythonTest/Cases/CPythonCasesManifest.ini index 301a1838f..35ddbe278 100644 --- a/Src/IronPythonTest/Cases/CPythonCasesManifest.ini +++ b/Src/IronPythonTest/Cases/CPythonCasesManifest.ini @@ -1197,8 +1197,3 @@ Ignore=true [CPython.unittest.test_suite] RunCondition=NOT $(IS_MONO) # TODO: debug - -# -------------------- The following worked in 3.4 --------------------------- - -[CPython.test_int] -Ignore=true # blocked by https://github.com/IronLanguages/ironpython3/issues/105