Showing with 513 additions and 269 deletions.
  1. +87 −45 dsplit.d
  2. +426 −224 dustmite.d
132 changes: 87 additions & 45 deletions dsplit.d
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,32 @@ import std.ascii;
import std.array;
debug import std.stdio;

struct Entity
class Entity
{
string head;
Entity[] children;
string tail;

string filename, contents;
@property bool isFile() { return filename != ""; }

bool isPair; /// internal hint
bool noRemove; /// don't try removing this entity (children OK)

alias head filename; // for depth 0
bool removed; /// For dangling dependencies
Entity[] dependencies;

int id; /// For diagnostics
int descendants; /// For progress display

this(string head = null, Entity[] children = null, string tail = null, string filename = null, bool isPair = false)
{
this.head = head;
this.children = children;
this.tail = tail;
this.filename = filename;
this.isPair = isPair;
}
}

struct ParseOptions
Expand All @@ -31,36 +47,36 @@ struct ParseOptions
Mode mode;
}

Entity[] loadFiles(ref string path, ParseOptions options)
Entity loadFiles(ref string path, ParseOptions options)
{
if (isFile(path))
{
auto filePath = path;
path = getName(path) is null ? path : getName(path);
return [Entity(basename(filePath).replace(`\`, `/`), loadFile(filePath, options), null)];
return loadFile(basename(filePath).replace(`\`, `/`), filePath, options);
}
else
{
Entity[] set;
auto set = new Entity();
foreach (string entry; dirEntries(path, SpanMode.breadth))
if (isFile(entry))
{
assert(entry.startsWith(path));
auto name = entry[path.length+1..$];
set ~= Entity(name.replace(`\`, `/`), loadFile(entry, options), null);
set.children ~= loadFile(name, entry, options);
}
return set;
}
}

enum BIN_SIZE = 2;

void optimize(ref Entity[] set)
void optimize(Entity set)
{
static void group(ref Entity[] set, size_t start, size_t end)
{
//set = set[0..start] ~ [Entity(removable, set[start..end])] ~ set[end..$];
set.replaceInPlace(start, end, [Entity(null, set[start..end].dup, null)]);
//set = set[0..start] ~ [new Entity(removable, set[start..end])] ~ set[end..$];
set.replaceInPlace(start, end, [new Entity(null, set[start..end].dup, null)]);
}

static void clusterBy(ref Entity[] set, size_t binSize)
Expand All @@ -78,26 +94,28 @@ void optimize(ref Entity[] set)
}
}

static void doOptimize(ref Entity[] set)
static void doOptimize(Entity e)
{
foreach (ref entity; set)
doOptimize(entity.children);
clusterBy(set, BIN_SIZE);
foreach (c; e.children)
doOptimize(c);
clusterBy(e.children, BIN_SIZE);
}

doOptimize(set);
}

private:

Entity[] loadFile(string path, ParseOptions options)
Entity loadFile(string name, string path, ParseOptions options)
{
debug writeln("Loading ", path);
string contents = cast(string)read(path);
auto result = new Entity();
result.filename = name.replace(`\`, `/`);
result.contents = cast(string)read(path);

if (options.stripComments)
if (getExt(path) == "d" || getExt(path) == "di")
contents = stripDComments(contents);
result.contents = stripDComments(result.contents);

final switch (options.mode)
{
Expand All @@ -106,13 +124,13 @@ Entity[] loadFile(string path, ParseOptions options)
{
case "d":
case "di":
return parseD(contents);
result.children = parseD(result.contents); return result;
// One could add custom splitters for other languages here - for example, a simple line/word/character splitter for most text-based formats
default:
return [Entity(contents, null, null)];
result.children = [new Entity(result.contents, null, null)]; return result;
}
case ParseOptions.Mode.Words:
return parseToWords(contents);
result.children = parseToWords(result.contents); return result;
}
}

Expand Down Expand Up @@ -239,9 +257,11 @@ Entity[] parseD(string s)

Entity[] parseScope(char end)
{
enum MAX_SPLITTER_LEVELS = 4;
// Here be dragons.

enum MAX_SPLITTER_LEVELS = 5;
struct DSplitter { char open, close, sep; }
static const DSplitter[MAX_SPLITTER_LEVELS] splitters = [{'{','}',';'}, {'(',')'}, {'[',']'}, {sep:','}];
static const DSplitter[MAX_SPLITTER_LEVELS] splitters = [{'{','}',';'}, {'(',')'}, {'[',']'}, {sep:','}, {sep:' '}];

Entity[][MAX_SPLITTER_LEVELS] splitterQueue;

Expand All @@ -259,7 +279,7 @@ Entity[] parseD(string s)
if (next.length <= 1)
splitterQueue[level] ~= next;
else
splitterQueue[level] ~= Entity(null, next, null);
splitterQueue[level] ~= new Entity(null, next, null);
auto r = splitterQueue[level];
splitterQueue[level] = null;
return r;
Expand All @@ -283,7 +303,7 @@ Entity[] parseD(string s)
auto children = terminateLevel(level+1);
assert(i == start);
i++; skipToEOL(s, i);
splitterQueue[level] ~= Entity(null, children, terminateText());
splitterQueue[level] ~= new Entity(null, children, terminateText());
continue characterLoop;
}
else
Expand All @@ -297,15 +317,15 @@ Entity[] parseD(string s)
auto startSequence = terminateText();
auto bodyContents = parseScope(info.close);

auto pairBody = Entity(startSequence, bodyContents, innerTail);
auto pairBody = new Entity(startSequence, bodyContents, innerTail);

if (pairHead.length == 0)
splitterQueue[level] ~= pairBody;
else
if (pairHead.length == 1)
splitterQueue[level] ~= Entity(null, pairHead ~ pairBody, null, true);
splitterQueue[level] ~= new Entity(null, pairHead ~ pairBody, null, null, true);
else
splitterQueue[level] ~= Entity(null, [Entity(null, pairHead, null), pairBody], null, true);
splitterQueue[level] ~= new Entity(null, [new Entity(null, pairHead, null), pairBody], null, null, true);
continue characterLoop;
}

Expand Down Expand Up @@ -344,12 +364,23 @@ string stripDComments(string s)
return result.data;
}

/// Group together consecutive entities which might represent a single language construct
/// There is no penalty for false positives, so accuracy is not very important
void postProcessD(ref Entity[] entities)
{
for (int i=0; i<entities.length;)
{
// Add dependencies for comma-separated lists.

if (i+2 <= entities.length && entities[i].children.length >= 1 && entities[i].tail.stripD() == ",")
{
auto comma = new Entity(entities[i].tail);
entities[i].children ~= comma;
entities[i].tail = null;
comma.dependencies ~= [entities[i].children[$-2], getHeadEntity(entities[i+1])];
}

// Group together consecutive entities which might represent a single language construct
// There is no penalty for false positives, so accuracy is not very important

if (i+2 <= entities.length && entities.length > 2 && (
(getHeadText(entities[i]).startsWithWord("do") && getHeadText(entities[i+1]).isWord("while"))
|| (getHeadText(entities[i]).startsWithWord("try") && getHeadText(entities[i+1]).startsWithWord("catch"))
Expand All @@ -358,12 +389,13 @@ void postProcessD(ref Entity[] entities)
|| (getHeadText(entities[i+1]).isWord("out"))
|| (getHeadText(entities[i+1]).isWord("body"))
))
entities.replaceInPlace(i, i+2, [Entity(null, entities[i..i+2].dup, null)]);
else
{
postProcessD(entities[i].children);
i++;
}
entities.replaceInPlace(i, i+2, [new Entity(null, entities[i..i+2].dup, null)]);
continue;
}

postProcessD(entities[i].children);
i++;
}
}

Expand All @@ -380,12 +412,12 @@ Entity[] splitText(string s)
{
size_t p = word.ptr + word.length - s.ptr;
skipToEOL(s, p);
result ~= Entity(s[0..p], null, null);
result ~= new Entity(s[0..p], null, null);
s = s[p..$];
}
else
{
result ~= Entity(s, null, null);
result ~= new Entity(s, null, null);
s = null;
}
}
Expand Down Expand Up @@ -446,21 +478,31 @@ bool startsWithComment(string s)
return s.startsWith("//") || s.startsWith("/*") || s.startsWith("/+");
}

string getHeadText(in Entity e)
Entity getHeadEntity(Entity e)
{
if (e.head)
return e.head;
foreach (ref child; e.children)
if (e.head.length)
return e;
foreach (child; e.children)
{
string s = getHeadText(child);
if (s)
return s;
Entity r = getHeadEntity(child);
if (r)
return r;
}
if (e.tail)
return e.tail;
if (e.tail.length)
return e;
return null;
}

string getHeadText(Entity e)
{
e = getHeadEntity(e);
if (!e)
return null;
if (e.head)
return e.head;
return e.tail;
}

// ParseOptions.Mode.Words

bool isDWordChar(char c)
Expand All @@ -476,7 +518,7 @@ public Entity[] parseToWords(string text)
if (i==text.length || (!isDWordChar(text[i-1]) && isDWordChar(text[i])))
{
if (wordStart != i)
result ~= Entity(text[wordStart..wordEnd], null, text[wordEnd..i]);
result ~= new Entity(text[wordStart..wordEnd], null, text[wordEnd..i]);
wordStart = wordEnd = i;
}
else
Expand Down
Loading