Skip to content

Commit

Permalink
3x faster byLine implementation on OSX, probably faster on Linux as well
Browse files Browse the repository at this point in the history
  • Loading branch information
andralex committed Mar 22, 2015
1 parent f8e1157 commit 541e593
Showing 1 changed file with 110 additions and 84 deletions.
194 changes: 110 additions & 84 deletions std/stdio.d
Expand Up @@ -44,26 +44,31 @@ version (linux)
{
// Specific to the way Gnu C does stdio
version = GCC_IO;
version = HAS_GETDELIM;
}

version (OSX)
{
version = GENERIC_IO;
version = HAS_GETDELIM;
}

version (FreeBSD)
{
version = GENERIC_IO;
version = NO_GETDELIM;
}

version (Solaris)
{
version = GENERIC_IO;
version = NO_GETDELIM;
}

version (Android)
{
version = GENERIC_IO;
version = NO_GETDELIM;
}

version(Windows)
Expand Down Expand Up @@ -161,8 +166,6 @@ else version (GCC_IO)
int fgetwc_unlocked(_iobuf*);
void flockfile(FILE*);
void funlockfile(FILE*);
ptrdiff_t getline(char**, size_t*, FILE*);
ptrdiff_t getdelim (char**, size_t*, int, FILE*);

private size_t fwrite_unlocked(const(void)* ptr,
size_t size, size_t n, _iobuf *stream);
Expand Down Expand Up @@ -213,6 +216,13 @@ else
static assert(0, "unsupported C I/O system");
}

version(HAS_GETDELIM) extern(C) nothrow @nogc
{
ptrdiff_t getdelim(char**, size_t*, int, FILE*);
// getline() always comes together with getdelim()
ptrdiff_t getline(char**, size_t*, FILE*);
}

//------------------------------------------------------------------------------
struct ByRecord(Fields...)
{
Expand Down Expand Up @@ -327,12 +337,14 @@ struct File
{
import std.traits : isScalarType, isArray;
import std.range.primitives : ElementEncodingType;
enum Orientation { unknown, narrow, wide }

private struct Impl
{
FILE * handle = null; // Is null iff this Impl is closed by another File
uint refs = uint.max / 2;
bool isPopened; // true iff the stream has been created by popen()
Orientation orientation;
}
private Impl* _p;
private string _name;
Expand All @@ -347,6 +359,7 @@ struct File
_p.handle = handle;
_p.refs = refs;
_p.isPopened = isPopened;
_p.orientation = Orientation.unknown;
_name = name;
}

Expand Down Expand Up @@ -1410,7 +1423,14 @@ for every line.
static if (is(C == char))
{
enforce(_p && _p.handle, "Attempt to read from an unopened file.");
return readlnImpl(_p.handle, buf, terminator);
if (_p.orientation == Orientation.unknown)
{
import core.stdc.wchar_ : fwide;
auto w = fwide(_p.handle, 0);
if (w < 0) _p.orientation = Orientation.narrow;
else if (w > 0) _p.orientation = Orientation.wide;
}
return readlnImpl(_p.handle, buf, terminator, _p.orientation);
}
else
{
Expand Down Expand Up @@ -1589,13 +1609,12 @@ Allows to directly use range operations on lines of a file.
private:
import std.typecons;

/* Ref-counting stops the source range's ByLineImpl
/* Ref-counting stops the source range's Impl
* from getting out of sync after the range is copied, e.g.
* when accessing range.front, then using std.range.take,
* then accessing range.front again. */
alias Impl = RefCounted!(ByLineImpl!(Char, Terminator),
RefCountedAutoInitialize.no);
Impl impl;
alias PImpl = RefCounted!(Impl, RefCountedAutoInitialize.no);
PImpl impl;

static if (isScalarType!Terminator)
enum defTerm = '\n';
Expand All @@ -1606,7 +1625,7 @@ Allows to directly use range operations on lines of a file.
this(File f, KeepTerminator kt = KeepTerminator.no,
Terminator terminator = defTerm)
{
impl = Impl(f, kt, terminator);
impl = PImpl(f, kt, terminator);
}

@property bool empty()
Expand All @@ -1623,78 +1642,61 @@ Allows to directly use range operations on lines of a file.
{
impl.refCountedPayload.popFront();
}
}

private struct ByLineImpl(Char, Terminator)
{
private:
File file;
Char[] line;
Terminator terminator;
KeepTerminator keepTerminator;

public:
this(File f, KeepTerminator kt, Terminator terminator)
{
file = f;
this.terminator = terminator;
keepTerminator = kt;
popFront();
}

// Range primitive implementations.
@property bool empty()
struct Impl
{
if (line !is null) return false;
if (!file.isOpen) return true;

// First read ever, must make sure stream is not empty. We
// do so by reading a character and putting it back. Doing
// so is guaranteed to work on all files opened in all
// buffering modes.
auto fp = file.getFP();
auto c = fgetc(fp);
if (c == -1)
private:
File file;
Char[] line;
Terminator terminator;
KeepTerminator keepTerminator;

public:
this(File f, KeepTerminator kt, Terminator terminator)
{
file.detach();
return true;
file = f;
this.terminator = terminator;
keepTerminator = kt;
popFront();
}
ungetc(c, fp) == c
|| assert(false, "Bug in cstdlib implementation");
return false;
}

@property Char[] front()
{
return line;
}

void popFront()
{
import std.algorithm : endsWith;
// Range primitive implementations.
@property bool empty()
{
return line is null;
}

assert(file.isOpen);
assumeSafeAppend(line);
file.readln(line, terminator);
if (line.empty)
@property Char[] front()
{
file.detach();
line = null;
return line;
}
else if (keepTerminator == KeepTerminator.no
&& std.algorithm.endsWith(line, terminator))

void popFront()
{
static if (isScalarType!Terminator)
enum tlen = 1;
else static if (isArray!Terminator)
import std.algorithm : endsWith;
assert(file.isOpen);
file.readln(line, terminator);
if (line.empty)
{
static assert(
is(Unqual!(ElementEncodingType!Terminator) == Char));
const tlen = terminator.length;
file.detach();
line = null;
}
else if (keepTerminator == KeepTerminator.no
&& std.algorithm.endsWith(line, terminator))
{
static if (isScalarType!Terminator)
enum tlen = 1;
else static if (isArray!Terminator)
{
static assert(
is(Unqual!(ElementEncodingType!Terminator) == Char));
const tlen = terminator.length;
}
else
static assert(false);
line = line.ptr[0 .. line.length - tlen];
}
else
static assert(false);
line = line.ptr[0 .. line.length - tlen];
}
}
}
Expand Down Expand Up @@ -1775,6 +1777,22 @@ the contents may well have changed).
return ByLine!(Char, Terminator)(this, keepTerminator, terminator);
}

unittest
{
auto deleteme = testFilename();
std.file.write(deleteme, "hi");
scope(success) std.file.remove(deleteme);

import std.typetuple;
foreach (T; TypeTuple!(char, wchar, dchar))
{
auto blc = File(deleteme).byLine!(T, T);
assert(blc.front == "hi");
// check front is cached
assert(blc.front is blc.front);
}
}

private struct ByLineCopy(Char, Terminator)
{
private:
Expand Down Expand Up @@ -1812,14 +1830,14 @@ the contents may well have changed).

private struct ByLineCopyImpl(Char, Terminator)
{
ByLineImpl!(Unqual!Char, Terminator) impl;
ByLine!(Unqual!Char, Terminator).Impl impl;
bool gotFront;
Char[] line;

public:
this(File f, KeepTerminator kt, Terminator terminator)
{
impl = ByLineImpl!(Unqual!Char, Terminator)(f, kt, terminator);
impl = ByLine!(Unqual!Char, Terminator).Impl(f, kt, terminator);
}

@property bool empty()
Expand Down Expand Up @@ -3863,7 +3881,7 @@ unittest

// Private implementation of readln
version (DIGITAL_MARS_STDIO)
private size_t readlnImpl(FILE* fps, ref char[] buf, dchar terminator = '\n')
private size_t readlnImpl(FILE* fps, ref char[] buf, dchar terminator, File.Orientation /*ignored*/)
{
import core.memory;
import core.stdc.string : memcpy;
Expand Down Expand Up @@ -4008,7 +4026,7 @@ private size_t readlnImpl(FILE* fps, ref char[] buf, dchar terminator = '\n')
}

version (MICROSOFT_STDIO)
private size_t readlnImpl(FILE* fps, ref char[] buf, dchar terminator = '\n')
private size_t readlnImpl(FILE* fps, ref char[] buf, dchar terminator, File.Orientation /*ignored*/)
{
import core.memory;
import std.array : appender, uninitializedArray;
Expand Down Expand Up @@ -4046,15 +4064,15 @@ private size_t readlnImpl(FILE* fps, ref char[] buf, dchar terminator = '\n')
return buf.length;
}

version (GCC_IO)
private size_t readlnImpl(FILE* fps, ref char[] buf, dchar terminator = '\n')
version (HAS_GETDELIM)
private size_t readlnImpl(FILE* fps, ref char[] buf, dchar terminator, File.Orientation orientation)
{
import core.memory;
import core.stdc.stdlib : free;
import core.stdc.wchar_ : fwide;
import std.utf : encode;

if (fwide(fps, 0) > 0)
if (orientation == File.Orientation.wide)
{
/* Stream is in wide characters.
* Read them and convert to chars.
Expand Down Expand Up @@ -4113,21 +4131,29 @@ private size_t readlnImpl(FILE* fps, ref char[] buf, dchar terminator = '\n')
}
}

char *lineptr = null;
size_t n = 0;
static char *lineptr = null;
static size_t n = 0;
scope(exit)
{
if (n > 128 * 1024)
{
// Bound memory used by readln
free(lineptr);
n = 0;
}
}

auto s = getdelim(&lineptr, &n, terminator, fps);
scope(exit) free(lineptr);
if (s < 0)
{
if (ferror(fps))
StdioException();
buf.length = 0; // end of file
return 0;
}
buf = buf.ptr[0 .. GC.sizeOf(buf.ptr)];
if (s <= buf.length)
if (s <= GC.sizeOf(buf.ptr))
{
buf.length = s;
buf = buf.ptr[0 .. s];
buf[] = lineptr[0 .. s];
}
else
Expand All @@ -4137,16 +4163,16 @@ private size_t readlnImpl(FILE* fps, ref char[] buf, dchar terminator = '\n')
return s;
}

version (GENERIC_IO)
private size_t readlnImpl(FILE* fps, ref char[] buf, dchar terminator = '\n')
version (NO_GETDELIM)
private size_t readlnImpl(FILE* fps, ref char[] buf, dchar terminator, File.Orientation orientation)
{
import core.stdc.wchar_ : fwide;
import std.utf : encode;

FLOCK(fps);
scope(exit) FUNLOCK(fps);
auto fp = cast(_iobuf*)fps;
if (fwide(fps, 0) > 0)
if (orientation == File.Orientation.wide)
{
/* Stream is in wide characters.
* Read them and convert to chars.
Expand Down

0 comments on commit 541e593

Please sign in to comment.