Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New sprintf implementation #7612

Draft
wants to merge 41 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
78944ef
Beginnings of a new sprintf parser/interpreter.
enebo Oct 14, 2021
0ddff4d
Remove extra printlns
enebo Oct 14, 2021
762f06d
\ is needed in Java String but not in bytelists
enebo Oct 14, 2021
289b71a
Make spec:ruby:fast run again
enebo Oct 14, 2021
978dcc5
fix '%%'. bytetoken only should return true
enebo Oct 14, 2021
ef223bb
Sketchy fix for %%d -> %d for printf
enebo Oct 14, 2021
25abce1
%.5d was not zero padding
enebo Oct 14, 2021
09dccb9
Fix parsing of names params in sprintf.
enebo Oct 14, 2021
e09cce9
Support positional index in sprintf
enebo Oct 14, 2021
e4b287d
Dual using token.width is confusing. Just make another field.
enebo Oct 14, 2021
03b3eab
Allow <name> to accept additional formatting.
enebo Oct 14, 2021
b9af53e
Implement %bBoxX.
enebo Oct 15, 2021
98fa357
Change some visibility on token fields.
enebo Oct 15, 2021
785d657
Fix assumed ordering of indexed arguments.
enebo Oct 17, 2021
0b44a2a
Made mistake in last commit in naming.
enebo Oct 17, 2021
03f2102
Fix first long-standing bug in spec/ruby for sprintf.
enebo Oct 17, 2021
25125b2
Fix another long standing problem. mixed error now thrown.
enebo Oct 17, 2021
2e7042e
Fix most remaining issues in test:mri except a single oddity involing…
enebo Oct 19, 2021
c4a9b42
Fix last precision issues?
enebo Oct 19, 2021
e45beef
Another fix for precision.
enebo Oct 19, 2021
210c522
Reverse SPRINTF env to run new sprintf it unset.
enebo Oct 22, 2021
34854aa
"%1" was an infinite loop
enebo Oct 24, 2021
9d1a38a
Some more literal '%' cases
enebo Oct 24, 2021
39c73c7
Fixes '%+.2d' zero padding
enebo Oct 24, 2021
fb6e223
Fixes '%-03d' should prefer '-' over '0'.
enebo Oct 24, 2021
5dc668c
Add in too few args error checking
enebo Oct 24, 2021
4b1a9d9
HEH...only run too many args check once
enebo Oct 25, 2021
396cd98
Some more error handling in parser
enebo Oct 26, 2021
95f5fd2
Merge remote-tracking branch 'origin/master' into new_sprintf
enebo Oct 26, 2021
a67836f
Change parser to not manage as much state variables
enebo Nov 5, 2021
70deb32
Merge branch 'jruby-9.3' into new_sprintf
enebo Nov 18, 2021
37f2374
new_sprintf WIP: support for %c failing tests
PurityLake Oct 21, 2021
44a34c3
new_sprintf WIP: Fixed failing tests and allowed for encoding
PurityLake Oct 21, 2021
15e976f
new_sprintf WIP: added support for %s
PurityLake Oct 26, 2021
6afdae9
Land %c/%p support
enebo Nov 18, 2021
411d3ad
Explicit width can mark rightpad at parse time.
enebo Nov 23, 2021
73116ae
More test_sprintf_comb.rb fixes
enebo Dec 2, 2021
0298a8e
Tests where we give exact results without precision loss.
enebo Dec 2, 2021
754a32f
Go back to what we had for zero pad logic
enebo Dec 3, 2021
dd6e128
Try moving out accessing fields from actual layout of processing instrs
enebo Dec 17, 2021
049d4ee
Remove last known %d error
enebo Dec 17, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
9 changes: 5 additions & 4 deletions core/src/main/java/org/jruby/RubyFloat.java
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@
import org.jruby.util.Numeric;
import org.jruby.util.Sprintf;

import static org.jruby.util.CommonByteLists.*;
import static org.jruby.util.Numeric.f_abs;
import static org.jruby.util.Numeric.f_add;
import static org.jruby.util.Numeric.f_expt;
Expand Down Expand Up @@ -264,14 +265,14 @@ public IRubyObject to_s() {
// Under 1.9, use full-precision float formatting (JRUBY-4846).
// Double-precision can represent around 16 decimal digits;
// we use 20 to ensure full representation.
Sprintf.sprintf(buf, Locale.US, "%#.20g", this);
Sprintf.sprintfUS(runtime, buf, G20, new IRubyObject[] {this});
int e = buf.indexOf('e');
if (e == -1) e = buf.getRealSize();
ASCIIEncoding ascii = ASCIIEncoding.INSTANCE;

if (!ascii.isDigit(buf.get(e - 1))) {
buf.setRealSize(0);
Sprintf.sprintf(buf, Locale.US, "%#.14e", this);
Sprintf.sprintfUS(runtime, buf, E14, new IRubyObject[] {this});
e = buf.indexOf('e');
if (e == -1) e = buf.getRealSize();
}
Expand Down Expand Up @@ -1169,13 +1170,13 @@ public IRubyObject finite_p() {
return runtime.getTrue();
}


private ByteList marshalDump() {
if (Double.isInfinite(value)) return value < 0 ? NEGATIVE_INFINITY_BYTELIST : INFINITY_BYTELIST;
if (Double.isNaN(value)) return NAN_BYTELIST;

ByteList byteList = new ByteList();
// Always use US locale, to ensure "." separator. JRUBY-5918
Sprintf.sprintf(byteList, Locale.US, "%.17g", RubyArray.newArray(getRuntime(), this));
Sprintf.sprintfUS(getRuntime(), byteList, G17, new IRubyObject[] { this });
return byteList;
}

Expand Down
18 changes: 7 additions & 11 deletions core/src/main/java/org/jruby/RubyKernel.java
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
import java.io.ByteArrayOutputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Locale;
import java.util.Map;

import jnr.constants.platform.Errno;
Expand Down Expand Up @@ -82,6 +83,7 @@
import org.jruby.util.ByteList;
import org.jruby.util.ConvertBytes;
import org.jruby.util.ShellLauncher;
import org.jruby.util.Sprintf;
import org.jruby.util.StringSupport;
import org.jruby.util.TypeConverter;
import org.jruby.util.cli.Options;
Expand All @@ -107,6 +109,7 @@
import static org.jruby.runtime.Visibility.PRIVATE;
import static org.jruby.runtime.Visibility.PROTECTED;
import static org.jruby.runtime.Visibility.PUBLIC;
import static org.jruby.util.Sprintf.sprintfUS;

/**
* Note: For CVS history, see KernelModule.java.
Expand Down Expand Up @@ -855,18 +858,11 @@ public static IRubyObject sprintf(ThreadContext context, IRubyObject recv, IRuby
throw context.runtime.newArgumentError("sprintf must have at least one argument");
}

RubyString str = RubyString.stringValue(args[0]);
RubyString formatString = RubyString.stringValue(args[0]);
IRubyObject[] newArgs = new IRubyObject[args.length - 1];
System.arraycopy(args, 1, newArgs, 0, args.length - 1);

IRubyObject arg;
if (args.length == 2 && args[1] instanceof RubyHash) {
arg = args[1];
} else {
RubyArray newArgs = RubyArray.newArrayMayCopy(context.runtime, args);
newArgs.shift(context);
arg = newArgs;
}

return str.op_format(context, arg);
return sprintfUS(context.runtime, formatString, newArgs);
}

@Deprecated
Expand Down
7 changes: 5 additions & 2 deletions core/src/main/java/org/jruby/RubyNameError.java
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@
import org.jruby.util.ByteList;
import org.jruby.util.Sprintf;

import java.util.Locale;

/**
* The Java representation of a Ruby NameError.
*
Expand Down Expand Up @@ -141,10 +143,11 @@ public IRubyObject to_str(ThreadContext context) {

// RubyString name = this.name.asString(); // Symbol -> String

RubyArray arr = RubyArray.newArray(runtime, this.name, description, separator, className);
IRubyObject[] arr = new IRubyObject[] {this.name, description, separator, className };

ByteList msgBytes = new ByteList(message.length() + description.size() + 16); // name.size()
Sprintf.sprintf(msgBytes, message, arr);
ByteList msg = ByteList.create(message); // FIXME: RubyNameError.message should be bytelist
Sprintf.sprintf(runtime, msgBytes, msg, arr);

return runtime.newString(msgBytes);
}
Expand Down
78 changes: 28 additions & 50 deletions core/src/main/java/org/jruby/RubyString.java
Original file line number Diff line number Diff line change
Expand Up @@ -91,25 +91,9 @@
import static org.jruby.RubyEnumerator.enumeratorizeWithSize;
import static org.jruby.anno.FrameField.BACKREF;
import static org.jruby.runtime.Visibility.PRIVATE;
import static org.jruby.util.StringSupport.CR_7BIT;
import static org.jruby.util.StringSupport.CR_BROKEN;
import static org.jruby.util.StringSupport.CR_MASK;
import static org.jruby.util.StringSupport.CR_UNKNOWN;
import static org.jruby.util.StringSupport.CR_VALID;
import static org.jruby.util.StringSupport.MBCLEN_CHARFOUND_LEN;
import static org.jruby.util.StringSupport.MBCLEN_CHARFOUND_P;
import static org.jruby.util.StringSupport.MBCLEN_INVALID_P;
import static org.jruby.util.StringSupport.MBCLEN_NEEDMORE_P;
import static org.jruby.util.StringSupport.codeLength;
import static org.jruby.util.StringSupport.codePoint;
import static org.jruby.util.StringSupport.codeRangeScan;
import static org.jruby.util.StringSupport.encFastMBCLen;
import static org.jruby.util.StringSupport.isSingleByteOptimizable;
import static org.jruby.util.StringSupport.memsearch;
import static org.jruby.util.StringSupport.memchr;
import static org.jruby.util.StringSupport.nth;
import static org.jruby.util.StringSupport.offset;
import static org.jruby.util.StringSupport.searchNonAscii;
import static org.jruby.util.CommonByteLists.X2X;
import static org.jruby.util.Sprintf.sprintfUS;
import static org.jruby.util.StringSupport.*;

/**
* Implementation of Ruby String class
Expand Down Expand Up @@ -1242,27 +1226,19 @@ private RubyString multiplyByteList(ThreadContext context, IRubyObject arg) {

@JRubyMethod(name = "%", required = 1)
public RubyString op_format(ThreadContext context, IRubyObject arg) {
IRubyObject tmp;
IRubyObject[] args;
if (arg instanceof RubyHash) {
tmp = arg;
args = new IRubyObject[] { arg };
} else {
tmp = arg.checkArrayType();
if (tmp.isNil()) tmp = arg;
IRubyObject tmp = arg.checkArrayType();
if (tmp.isNil()) {
args = new IRubyObject[] { arg };
} else {
args = ((RubyArray) tmp).toJavaArray(); // Is this broken with object shaping?
}
}

ByteList out = new ByteList(value.getRealSize());
out.setEncoding(value.getEncoding());

boolean tainted;

// FIXME: Should we make this work with platform's locale,
// or continue hardcoding US?
tainted = Sprintf.sprintf1_9(out, Locale.US, value, tmp);

RubyString str = newString(context.runtime, out);

str.setTaint(tainted || isTaint());
return str;
return sprintfUS(context.runtime, this, args);
}

@JRubyMethod
Expand Down Expand Up @@ -2416,7 +2392,7 @@ public static IRubyObject rbStrEscape(ThreadContext context, RubyString str) {
n = (int)(pend - p);
while ((n--) > 0) {
result.modify();
Sprintf.sprintf(runtime, result.getByteList(), "\\x%02X", pBytes[p] & 0377);
Sprintf.sprintfLong(runtime, result.getByteList(), X2X, pBytes[p] & 0377);
prev = ++p;
}
continue;
Expand Down Expand Up @@ -2446,7 +2422,7 @@ else if (asciicompat && Encoding.isAscii(c) && (c < 0x7F && c > 31 /*ISPRINT(c)*
else {
if (p - n > prev) result.cat(pBytes, prev, p - n - prev);
result.modify();
Sprintf.sprintf(runtime, result.getByteList(), StringSupport.escapedCharFormat(c, unicode_p), (c & 0xFFFFFFFFL));
Sprintf.sprintfLong(runtime, result.getByteList(), escapedFormat(c, unicode_p), c & 0xFFFFFFFFL);
prev = p;
}
}
Expand Down Expand Up @@ -2493,7 +2469,7 @@ public static RubyString inspect(final Ruby runtime, ByteList byteList) {
if (end < p + n) n = end - p;
while (n-- > 0) {
result.modifyExpand(result.size() + 4);
Sprintf.sprintf(runtime, result.getByteList() ,"\\x%02X", bytes[p] & 0377);
Sprintf.sprintfLong(runtime, result.getByteList(), X2X, bytes[p] & 0377);
prev = ++p;
}
continue;
Expand Down Expand Up @@ -2544,7 +2520,7 @@ public static RubyString inspect(final Ruby runtime, ByteList byteList) {
continue;
} else {
if (p - n > prev) result.cat(bytes, prev, p - n - prev);
Sprintf.sprintf(runtime, result.getByteList() , StringSupport.escapedCharFormat(c, isUnicode), (c & 0xFFFFFFFFL));
Sprintf.sprintfLong(runtime, result.getByteList(), escapedFormat(c, isUnicode), c & 0xFFFFFFFFL);
prev = p;
continue;
}
Expand Down Expand Up @@ -4069,6 +4045,8 @@ final IRubyObject uptoCommon(ThreadContext context, IRubyObject arg, boolean exc
return uptoCommon(context, arg.convertToString(), excl, block, false);
}

private static final ByteList PSTARD = new ByteList(new byte[] {'%', '.', '*', 'd'});

final IRubyObject uptoCommon(ThreadContext context, RubyString end, boolean excl, Block block, boolean asSymbol) {
final Ruby runtime = context.runtime;

Expand Down Expand Up @@ -4108,17 +4086,17 @@ final IRubyObject uptoCommon(ThreadContext context, RubyString end, boolean excl
IRubyObject b = stringToInum(10);
IRubyObject e = end.stringToInum(10);

RubyArray argsArr = RubyArray.newArray(runtime, RubyFixnum.newFixnum(runtime, value.length()), context.nil);
IRubyObject[] argsArr = new IRubyObject[] { RubyFixnum.newFixnum(runtime, value.length()), context.nil };

if (b instanceof RubyFixnum && e instanceof RubyFixnum) {
long bl = RubyNumeric.fix2long(b);
long el = RubyNumeric.fix2long(e);

while (bl <= el) {
if (excl && bl == el) break;
argsArr.eltSetOk(1, RubyFixnum.newFixnum(runtime, bl));
argsArr[1] = RubyFixnum.newFixnum(runtime, bl);
ByteList to = new ByteList(value.length() + 5);
Sprintf.sprintf(to, "%.*d", argsArr);
Sprintf.sprintf(runtime, to, PSTARD, argsArr);
RubyString str = RubyString.newStringNoCopy(runtime, to, USASCIIEncoding.INSTANCE, CR_7BIT);
block.yield(context, asSymbol ? runtime.newSymbol(str.toString()) : str);
bl++;
Expand All @@ -4128,9 +4106,9 @@ final IRubyObject uptoCommon(ThreadContext context, RubyString end, boolean excl
CallSite op = excl ? sites.op_lt : sites.op_le;

while (op.call(context, b, b, e).isTrue()) {
argsArr.eltSetOk(1, b);
argsArr[1] = b;
ByteList to = new ByteList(value.length() + 5);
Sprintf.sprintf(to, "%.*d", argsArr);
Sprintf.sprintf(runtime, to, PSTARD, argsArr);
RubyString str = RubyString.newStringNoCopy(runtime, to, USASCIIEncoding.INSTANCE, CR_7BIT);
block.yield(context, asSymbol ? runtime.newSymbol(str.toString()) : str);
b = sites.succ.call(context, b, b);
Expand Down Expand Up @@ -4173,24 +4151,24 @@ final IRubyObject uptoEndless(ThreadContext context, Block block) {

if (isAscii && ASCII.isDigit(value.getUnsafeBytes()[value.getBegin()])) {
IRubyObject b = stringToInum(10);
RubyArray argsArr = RubyArray.newArray(runtime, RubyFixnum.newFixnum(runtime, value.length()), context.nil);
IRubyObject[] argsArr = new IRubyObject[] { RubyFixnum.newFixnum(runtime, value.length()), context.nil };
ByteList to;

if (b instanceof RubyFixnum) {
long bl = RubyNumeric.fix2long(b);

while (bl < RubyFixnum.MAX) {
argsArr.eltSetOk(1, RubyFixnum.newFixnum(runtime, bl));
argsArr[1] = RubyFixnum.newFixnum(runtime, bl);
to = new ByteList(value.length() + 5);
Sprintf.sprintf(to, "%.*d", argsArr);
Sprintf.sprintf(runtime, to, PSTARD, argsArr);
current = RubyString.newStringNoCopy(runtime, to, USASCIIEncoding.INSTANCE, CR_7BIT);
block.yield(context, current);
bl++;
}

argsArr.eltSetOk(1, RubyFixnum.newFixnum(runtime, bl));
argsArr[1] = RubyFixnum.newFixnum(runtime, bl);
to = new ByteList(value.length() + 5);
Sprintf.sprintf(to, "%.*d", argsArr);
Sprintf.sprintf(runtime, to, PSTARD, argsArr);
current = RubyString.newStringNoCopy(runtime, to, USASCIIEncoding.INSTANCE, CR_7BIT);
}
}
Expand Down
12 changes: 6 additions & 6 deletions core/src/main/java/org/jruby/RubyUncaughtThrowError.java
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@
import org.jruby.runtime.ThreadContext;
import org.jruby.runtime.Visibility;
import org.jruby.runtime.builtin.IRubyObject;
import org.jruby.util.ByteList;

import static org.jruby.util.Sprintf.sprintfUS;

/**
* The Java representation of a Ruby UncaughtThrowError.
Expand Down Expand Up @@ -89,13 +92,10 @@ public IRubyObject initialize(IRubyObject[] args, Block block) {

@Override
public RubyString to_s(ThreadContext context) {
if ( message.isNil() ) {
return RubyString.newEmptyString(context.runtime);
}
if ( tag == null ) return message.asString();
if (message.isNil()) return RubyString.newEmptyString(context.runtime);
if (tag == null) return message.asString();

final RubyString str = message.asString();
return str.op_format(context, RubyArray.newArray(context.runtime, tag));
return sprintfUS(context.runtime, message.asString(), new IRubyObject[] { tag }) ;
}

@Override
Expand Down
4 changes: 3 additions & 1 deletion core/src/main/java/org/jruby/ext/date/RubyDate.java
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@
import static org.jruby.RubyRegexp.*;
import static org.jruby.ext.date.DateUtils.*;
import static org.jruby.util.Numeric.*;
import static org.jruby.util.Sprintf.sprintfUS;

/**
* JRuby's <code>Date</code> implementation - 'native' parts.
Expand Down Expand Up @@ -1626,7 +1627,8 @@ public RubyString to_s(ThreadContext context) { // format('%.4d-%02d-%02d', year

static RubyString format(ThreadContext context, ByteList fmt, IRubyObject... args) {
final RubyString str = RubyString.newStringLight(context.runtime, fmt);
return str.op_format(context, RubyArray.newArrayNoCopy(context.runtime, args));
// FIXME: This could go more directly at sprintf internally and not wrap the format in a RubyString
return sprintfUS(context.runtime, str, args);
}

@JRubyMethod
Expand Down
8 changes: 8 additions & 0 deletions core/src/main/java/org/jruby/util/CommonByteLists.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ public class CommonByteLists {
public static final ByteList AREF_METHOD = new ByteList(new byte[] {'[', ']'});
public static final ByteList ASET_METHOD = new ByteList(new byte[] {'[', ']', '='});
public static final ByteList BACKTRACE_IN = new ByteList(new byte[] {':', 'i', 'n', ' ', '`'});
public static final ByteList C = new ByteList(new byte[] {'%', 'c' });
public static final ByteList CARET = new ByteList(new byte[] {'^'});
public static final ByteList COLON = new ByteList(new byte[] {':'});
public static final ByteList COLON_COLON = new ByteList(new byte[] {':', ':'});
Expand All @@ -22,6 +23,9 @@ public class CommonByteLists {
public static final ByteList EXCEPTION = new ByteList(new byte[] {'E', 'x', 'c', 'e', 'p', 't', 'i', 'o', 'n'});
public static final ByteList _END_ = new ByteList(new byte[] {'_', 'E', 'N', 'D', '_'});
public static final ByteList EQUAL_TILDE = new ByteList(new byte[] {'=', '~'});
public static final ByteList G17 = new ByteList(new byte[] {'%', '.', '1', '7', 'g'});
public static final ByteList G20 = new ByteList(new byte[] {'%', '#', '.', '2', '0', 'g'});
public static final ByteList E14 = new ByteList(new byte[] {'%', '#', '.', '1', '4', 'e'});
public static final ByteList METHODS = new ByteList(new byte[] {'m', 'e', 't', 'h', 'o', 'd', 's'});
public static final ByteList MINUS = new ByteList(new byte[] {'-'});
public static final ByteList FREEZE_METHOD = new ByteList(new byte[] {'f', 'r', 'e', 'e', 'z', 'e'});
Expand All @@ -39,5 +43,9 @@ public class CommonByteLists {
public static final ByteList UNDERSCORE = new ByteList(new byte[] {'_'});
public static final ByteList USING_METHOD = new ByteList(new byte[] {'u', 's', 'i', 'n', 'g'});
public static final ByteList REFINE_METHOD = new ByteList(new byte[] {'r', 'e', 'f', 'i', 'n', 'e'});
public static final ByteList UX = new ByteList(new byte[] {'\\', 'u', '{', '%', 'X', '}'});
public static final ByteList U4X = new ByteList(new byte[] {'\\', 'u', '%', '0', '4', 'X'});
public static final ByteList X2X = new ByteList(new byte[] {'\\', 'x', '%', '0', '2', 'X'});
public static final ByteList XX = new ByteList(new byte[] {'\\', 'x', '{', '%', 'X', '}'});
public static final ByteList ZERO = new ByteList(new byte[] {'0'});
}