Skip to content

Commit

Permalink
improve memoize with size limit
Browse files Browse the repository at this point in the history
- previous implementation was dropping complete
  hash-table whenever max size was reached

- new implementation simply replaces old values
  in a fixed size hash-table when hashes collide

- use cuckoo hashing for better space usage and to
  reduce collisions of frequently used values
  • Loading branch information
MartinNowak committed Nov 4, 2014
1 parent 3bb4bb2 commit c12e66a
Showing 1 changed file with 109 additions and 57 deletions.
166 changes: 109 additions & 57 deletions std/functional.d
Expand Up @@ -420,7 +420,7 @@ unittest
}

/**
$(LINK2 http://en.wikipedia.org/wiki/Partial_application, Partially
$(LINK2 http://en.wikipedia.org/wiki/Partial_application, Partially
applies) $(D_PARAM fun) by tying its first argument to $(D_PARAM arg).
Example:
Expand Down Expand Up @@ -801,76 +801,124 @@ Technically the memoized function should be pure because $(D memoize) assumes it
always return the same result for a given tuple of arguments. However, $(D memoize) does not
enforce that because sometimes it
is useful to memoize an impure function, too.
*/
template memoize(alias fun)
{
// alias Args = ParameterTypeTuple!fun; // Bugzilla 13580

To _memoize a recursive function, simply insert the memoized call in lieu of the plain recursive call.
For example, to transform the exponential-time Fibonacci implementation into a linear-time computation:
ReturnType!fun memoize(ParameterTypeTuple!fun args)
{
alias Args = ParameterTypeTuple!fun;
import std.typecons : Tuple;

Example:
----
ulong fib(ulong n)
{
alias mfib = memoize!fib;
return n < 2 ? 1 : mfib(n - 2) + mfib(n - 1);
static ReturnType!fun[Tuple!Args] memo;
auto t = Tuple!Args(args);
if (auto p = t in memo)
return *p;
return memo[t] = fun(args);
}
}
...
assert(fib(10) == 89);
----

To improve the speed of the factorial function,
Example:
----
ulong fact(ulong n)
/// ditto
template memoize(alias fun, uint maxSize)
{
alias mfact = memoize!fact;
return n < 2 ? 1 : n * mfact(n - 1);
// alias Args = ParameterTypeTuple!fun; // Bugzilla 13580
ReturnType!fun memoize(ParameterTypeTuple!fun args)
{
import std.typecons : tuple;
static struct Value { ParameterTypeTuple!fun args; ReturnType!fun res; }
static Value[] memo;
static size_t[] initialized;

if (!memo.length)
{
import core.memory;

enum attr = GC.BlkAttr.NO_INTERIOR | (hasIndirections!Value ? 0 : GC.BlkAttr.NO_SCAN);
memo = (cast(Value*)GC.malloc(Value.sizeof * maxSize, ))[0 .. maxSize];
enum nwords = (maxSize + 8 * size_t.sizeof - 1) / (8 * size_t.sizeof);
initialized = (cast(size_t*)GC.calloc(nwords * size_t.sizeof, attr | GC.BlkAttr.NO_SCAN))[0 .. nwords];
}

import core.bitop : bts;
import std.conv : emplace;

size_t hash;
foreach (ref arg; args)
hash = hashOf(arg, hash);
// cuckoo hashing
immutable idx1 = hash % maxSize;
if (!bts(initialized.ptr, idx1))
return emplace(&memo[idx1], args, fun(args)).res;
else if (memo[idx1].args == args)
return memo[idx1].res;
// FNV prime
immutable idx2 = (hash * 16777619) % maxSize;
if (!bts(initialized.ptr, idx2))
emplace(&memo[idx2], memo[idx1]);
else if (memo[idx2].args == args)
return memo[idx2].res;
else if (idx1 != idx2)
memo[idx2] = memo[idx1];

memo[idx1] = Value(args, fun(args));
return memo[idx1].res;
}
}
...
assert(fact(10) == 3628800);
----

This memoizes all values of $(D fact) up to the largest argument. To only cache the final
result, move $(D memoize) outside the function as shown below.
/**
* To _memoize a recursive function, simply insert the memoized call in lieu of the plain recursive call.
* For example, to transform the exponential-time Fibonacci implementation into a linear-time computation:
*/
unittest
{
ulong fib(ulong n)
{
return n < 2 ? 1 : memoize!fib(n - 2) + memoize!fib(n - 1);
}
assert(fib(10) == 89);
}

Example:
----
ulong factImpl(ulong n)
/**
* To improve the speed of the factorial function,
*/
unittest
{
return n < 2 ? 1 : n * factImpl(n - 1);
ulong fact(ulong n)
{
return n < 2 ? 1 : n * memoize!fact(n - 1);
}
assert(fact(10) == 3628800);
}
alias fact = memoize!factImpl;
...
assert(fact(10) == 3628800);
----

The $(D maxSize) parameter is a cutoff for the cache size. If upon a miss the length of the hash
table is found to be $(D maxSize), the table is simply cleared.
/**
* This memoizes all values of $(D fact) up to the largest argument. To only cache the final
* result, move $(D memoize) outside the function as shown below.
*/
unittest
{
ulong factImpl(ulong n)
{
return n < 2 ? 1 : n * factImpl(n - 1);
}
alias fact = memoize!factImpl;
assert(fact(10) == 3628800);
}

Example:
----
// Memoize no more than 128 values of transmogrify
alias fastTransmogrify = memoize!(transmogrify, 128);
----
*/
template memoize(alias fun, uint maxSize = uint.max)
/**
* When the $(D maxSize) parameter is specified, memoize will used
* a fixed size hash table to limit the number of cached entries.
*/
unittest
{
private alias Args = ParameterTypeTuple!fun;
ReturnType!fun memoize(Args args)
ulong fact(ulong n)
{
import std.typecons : Tuple, tuple;
static ReturnType!fun[Tuple!Args] memo;
auto t = Tuple!Args(args);
auto p = t in memo;
if (p) return *p;
static if (maxSize != uint.max)
{
if (memo.length >= maxSize) memo = null;
}
auto r = fun(args);
//writeln("Inserting result ", typeof(r).stringof, "(", r, ") for parameters ", t);
memo[t] = r;
return r;
// Memoize no more than 8 values
return n < 2 ? 1 : n * memoize!(fact, 8)(n - 1);
}
assert(fact(8) == 40320);
// using more entries than maxSize will overwrite existing entries
assert(fact(10) == 3628800);
}

unittest
Expand Down Expand Up @@ -912,6 +960,11 @@ unittest
else
return 1 + mLen2(s[1 .. $]);
}

int _func(int x) { return 1; }
alias func = memoize!(_func, 10);
assert(func(int.init) == 1);
assert(func(int.init) == 1);
}

private struct DelegateFaker(F)
Expand Down Expand Up @@ -1126,4 +1179,3 @@ unittest {
static assert(! is(typeof(dg_xtrnC) == typeof(dg_xtrnD)));
}
}

0 comments on commit c12e66a

Please sign in to comment.