diff --git a/qa/rpc-tests/test_framework/keccak.py b/qa/rpc-tests/test_framework/keccak.py
new file mode 100644
index 0000000000000..4e696c0a4b934
--- /dev/null
+++ b/qa/rpc-tests/test_framework/keccak.py
@@ -0,0 +1,365 @@
+# Keccak implementation.
+# ==========================(LICENSE BEGIN)============================
+#
+# Copyright (c) 2007-2010  Projet RNRT SAPHIR
+# Copyright (c) 2017 The Bitcoin Core developers
+#
+# Permission is hereby granted, free of charge, to any person obtaining
+# a copy of this software and associated documentation files (the
+# "Software"), to deal in the Software without restriction, including
+# without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sublicense, and/or sell copies of the Software, and to
+# permit persons to whom the Software is furnished to do so, subject to
+# the following conditions:
+#
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#
+# ===========================(LICENSE END)=============================
+#
+# @author   Thomas Pornin <thomas.pornin@cryptolog.com>
+# (Ported to Python by Luke Dashjr)
+#
+
+import struct
+
+def SPH_T64(x):
+    return x & 0xFFFFFFFFFFFFFFFF
+
+def SPH_ROTL64(x, n):
+    return SPH_T64((x << n) | (x >> (64 - n)))
+
+RC = (
+        0x0000000000000001, 0x0000000000008082,
+        0x800000000000808A, 0x8000000080008000,
+        0x000000000000808B, 0x0000000080000001,
+        0x8000000080008081, 0x8000000000008009,
+        0x000000000000008A, 0x0000000000000088,
+        0x0000000080008009, 0x000000008000000A,
+        0x000000008000808B, 0x800000000000008B,
+        0x8000000000008089, 0x8000000000008003,
+        0x8000000000008002, 0x8000000000000080,
+        0x000000000000800A, 0x800000008000000A,
+        0x8000000080008081, 0x8000000000008080,
+        0x0000000080000001, 0x8000000080008008
+)
+
+def TH_ELT(c0, c1, c2, c3, c4, d0, d1, d2, d3, d4):
+    tt0 = d0 ^ d1
+    tt1 = d2 ^ d3
+    tt0 = tt0 ^ d4
+    tt0 = tt0 ^ tt1
+    tt0 = SPH_ROTL64(tt0, 1)
+    tt2 = c0 ^ c1
+    tt3 = c2 ^ c3
+    tt0 = tt0 ^ c4
+    tt2 = tt2 ^ tt3
+    return tt0 ^ tt2
+
+def THETA(b):
+    t0 = TH_ELT(b[40], b[41], b[42], b[43], b[44], b[10], b[11], b[12], b[13], b[14])
+    t1 = TH_ELT(b[0], b[1], b[2], b[3], b[4], b[20], b[21], b[22], b[23], b[24])
+    t2 = TH_ELT(b[10], b[11], b[12], b[13], b[14], b[30], b[31], b[32], b[33], b[34])
+    t3 = TH_ELT(b[20], b[21], b[22], b[23], b[24], b[40], b[41], b[42], b[43], b[44])
+    t4 = TH_ELT(b[30], b[31], b[32], b[33], b[34], b[0], b[1], b[2], b[3], b[4])
+    b[0] = b[0] ^ t0
+    b[1] = b[1] ^ t0
+    b[2] = b[2] ^ t0
+    b[3] = b[3] ^ t0
+    b[4] = b[4] ^ t0
+    b[10] = b[10] ^ t1
+    b[11] = b[11] ^ t1
+    b[12] = b[12] ^ t1
+    b[13] = b[13] ^ t1
+    b[14] = b[14] ^ t1
+    b[20] = b[20] ^ t2
+    b[21] = b[21] ^ t2
+    b[22] = b[22] ^ t2
+    b[23] = b[23] ^ t2
+    b[24] = b[24] ^ t2
+    b[30] = b[30] ^ t3
+    b[31] = b[31] ^ t3
+    b[32] = b[32] ^ t3
+    b[33] = b[33] ^ t3
+    b[34] = b[34] ^ t3
+    b[40] = b[40] ^ t4
+    b[41] = b[41] ^ t4
+    b[42] = b[42] ^ t4
+    b[43] = b[43] ^ t4
+    b[44] = b[44] ^ t4
+
+def RHO(b):
+    # b[0] = SPH_ROTL64(b[0],  0)
+    b[1] = SPH_ROTL64(b[1], 36)
+    b[2] = SPH_ROTL64(b[2],  3)
+    b[3] = SPH_ROTL64(b[3], 41)
+    b[4] = SPH_ROTL64(b[4], 18)
+    b[10] = SPH_ROTL64(b[10],  1)
+    b[11] = SPH_ROTL64(b[11], 44)
+    b[12] = SPH_ROTL64(b[12], 10)
+    b[13] = SPH_ROTL64(b[13], 45)
+    b[14] = SPH_ROTL64(b[14],  2)
+    b[20] = SPH_ROTL64(b[20], 62)
+    b[21] = SPH_ROTL64(b[21],  6)
+    b[22] = SPH_ROTL64(b[22], 43)
+    b[23] = SPH_ROTL64(b[23], 15)
+    b[24] = SPH_ROTL64(b[24], 61)
+    b[30] = SPH_ROTL64(b[30], 28)
+    b[31] = SPH_ROTL64(b[31], 55)
+    b[32] = SPH_ROTL64(b[32], 25)
+    b[33] = SPH_ROTL64(b[33], 21)
+    b[34] = SPH_ROTL64(b[34], 56)
+    b[40] = SPH_ROTL64(b[40], 27)
+    b[41] = SPH_ROTL64(b[41], 20)
+    b[42] = SPH_ROTL64(b[42], 39)
+    b[43] = SPH_ROTL64(b[43],  8)
+    b[44] = SPH_ROTL64(b[44], 14)
+
+#
+# The KHI macro integrates the "lane complement" optimization. On input,
+# some words are complemented:
+#    self.wide[ 0] self.wide[ 5] self.wide[10] self.wide[20] self.wide[16] self.wide[ 2] self.wide[ 7] self.wide[12] self.wide[ 3] self.wide[18] self.wide[23] self.wide[19]
+# On output, the following words are complemented:
+#    self.wide[20] self.wide[ 1] self.wide[ 2] self.wide[12] self.wide[17] self.wide[ 8]
+#
+# The (implicit) permutation and the theta expansion will bring back
+# the input mask for the next round.
+#
+
+def KHI_XO(a, b, c):
+    kt = b | c
+    return a ^ kt
+
+def KHI_XA(a, b, c):
+    kt = b & c
+    return a ^ kt
+
+def KHI(b):
+    bnn = SPH_T64(~b[20])
+    c0 = KHI_XO(b[0], b[10], b[20])
+    c1 = KHI_XO(b[10], bnn, b[30])
+    c2 = KHI_XA(b[20], b[30], b[40])
+    c3 = KHI_XO(b[30], b[40], b[0])
+    c4 = KHI_XA(b[40], b[0], b[10])
+    b[0] = c0
+    b[10] = c1
+    b[20] = c2
+    b[30] = c3
+    b[40] = c4
+    bnn = SPH_T64(~b[41])
+    c0 = KHI_XO(b[1], b[11], b[21])
+    c1 = KHI_XA(b[11], b[21], b[31])
+    c2 = KHI_XO(b[21], b[31], bnn)
+    c3 = KHI_XO(b[31], b[41], b[1])
+    c4 = KHI_XA(b[41], b[1], b[11])
+    b[1] = c0
+    b[11] = c1
+    b[21] = c2
+    b[31] = c3
+    b[41] = c4
+    bnn = SPH_T64(~b[32])
+    c0 = KHI_XO(b[2], b[12], b[22])
+    c1 = KHI_XA(b[12], b[22], b[32])
+    c2 = KHI_XA(b[22], bnn, b[42])
+    c3 = KHI_XO(bnn, b[42], b[2])
+    c4 = KHI_XA(b[42], b[2], b[12])
+    b[2] = c0
+    b[12] = c1
+    b[22] = c2
+    b[32] = c3
+    b[42] = c4
+    bnn = SPH_T64(~b[33])
+    c0 = KHI_XA(b[3], b[13], b[23])
+    c1 = KHI_XO(b[13], b[23], b[33])
+    c2 = KHI_XO(b[23], bnn, b[43])
+    c3 = KHI_XA(bnn, b[43], b[3])
+    c4 = KHI_XO(b[43], b[3], b[13])
+    b[3] = c0
+    b[13] = c1
+    b[23] = c2
+    b[33] = c3
+    b[43] = c4
+    bnn = SPH_T64(~b[14])
+    c0 = KHI_XA(b[4], bnn, b[24])
+    c1 = KHI_XO(bnn, b[24], b[34])
+    c2 = KHI_XA(b[24], b[34], b[44])
+    c3 = KHI_XO(b[34], b[44], b[4])
+    c4 = KHI_XA(b[44], b[4], b[14])
+    b[4] = c0
+    b[14] = c1
+    b[24] = c2
+    b[34] = c3
+    b[44] = c4
+
+
+class P:
+    seqmap = {}
+
+    def __init__(self, arr, seq):
+        self.arr = arr
+        self.seq = seq
+
+    def __getitem__(self, n):
+        return self.arr[self.seq[self.seqmap[n]]]
+
+    def __setitem__(self, n, val):
+        self.arr[self.seq[self.seqmap[n]]] = val
+
+def _setup_P_seqmap():
+    i = 0
+    for n in (0, 1, 2, 3, 4, 10, 11, 12, 13, 14, 20, 21, 22, 23, 24, 30, 31, 32, 33, 34, 40, 41, 42, 43, 44):
+        P.seqmap[n] = i
+        i += 1
+_setup_P_seqmap()
+
+Pn  = (
+    (0, 5, 10, 15, 20, 1, 6, 11, 16, 21, 2, 7, 12, 17, 22, 3, 8, 13, 18, 23, 4, 9, 14, 19, 24),
+    (0, 3, 1, 4, 2, 6, 9, 7, 5, 8, 12, 10, 13, 11, 14, 18, 16, 19, 17, 15, 24, 22, 20, 23, 21),
+    (0, 18, 6, 24, 12, 9, 22, 10, 3, 16, 13, 1, 19, 7, 20, 17, 5, 23, 11, 4, 21, 14, 2, 15, 8),
+    (0, 17, 9, 21, 13, 22, 14, 1, 18, 5, 19, 6, 23, 10, 2, 11, 3, 15, 7, 24, 8, 20, 12, 4, 16),
+    (0, 11, 22, 8, 19, 14, 20, 6, 17, 3, 23, 9, 15, 1, 12, 7, 18, 4, 10, 21, 16, 2, 13, 24, 5),
+    (0, 7, 14, 16, 23, 20, 2, 9, 11, 18, 15, 22, 4, 6, 13, 10, 17, 24, 1, 8, 5, 12, 19, 21, 3),
+    (0, 10, 20, 5, 15, 2, 12, 22, 7, 17, 4, 14, 24, 9, 19, 1, 11, 21, 6, 16, 3, 13, 23, 8, 18),
+    (0, 1, 2, 3, 4, 12, 13, 14, 10, 11, 24, 20, 21, 22, 23, 6, 7, 8, 9, 5, 18, 19, 15, 16, 17),
+    (0, 6, 12, 18, 24, 13, 19, 20, 1, 7, 21, 2, 8, 14, 15, 9, 10, 16, 22, 3, 17, 23, 4, 5, 11),
+    (0, 9, 13, 17, 21, 19, 23, 2, 6, 10, 8, 12, 16, 20, 4, 22, 1, 5, 14, 18, 11, 15, 24, 3, 7),
+    (0, 22, 19, 11, 8, 23, 15, 12, 9, 1, 16, 13, 5, 2, 24, 14, 6, 3, 20, 17, 7, 4, 21, 18, 10),
+    (0, 14, 23, 7, 16, 15, 4, 13, 22, 6, 5, 19, 3, 12, 21, 20, 9, 18, 2, 11, 10, 24, 8, 17, 1),
+    (0, 20, 15, 10, 5, 4, 24, 19, 14, 9, 3, 23, 18, 13, 8, 2, 22, 17, 12, 7, 1, 21, 16, 11, 6),
+    (0, 2, 4, 1, 3, 24, 21, 23, 20, 22, 18, 15, 17, 19, 16, 12, 14, 11, 13, 10, 6, 8, 5, 7, 9),
+    (0, 12, 24, 6, 18, 21, 8, 15, 2, 14, 17, 4, 11, 23, 5, 13, 20, 7, 19, 1, 9, 16, 3, 10, 22),
+    (0, 13, 21, 9, 17, 8, 16, 4, 12, 20, 11, 24, 7, 15, 3, 19, 2, 10, 23, 6, 22, 5, 18, 1, 14),
+    (0, 19, 8, 22, 11, 16, 5, 24, 13, 2, 7, 21, 10, 4, 18, 23, 12, 1, 15, 9, 14, 3, 17, 6, 20),
+    (0, 23, 16, 14, 7, 5, 3, 21, 19, 12, 10, 8, 1, 24, 17, 15, 13, 6, 4, 22, 20, 18, 11, 9, 2),
+    (0, 15, 5, 20, 10, 3, 18, 8, 23, 13, 1, 16, 6, 21, 11, 4, 19, 9, 24, 14, 2, 17, 7, 22, 12),
+    (0, 4, 3, 2, 1, 18, 17, 16, 15, 19, 6, 5, 9, 8, 7, 24, 23, 22, 21, 20, 12, 11, 10, 14, 13),
+    (0, 24, 18, 12, 6, 17, 11, 5, 4, 23, 9, 3, 22, 16, 10, 21, 15, 14, 8, 2, 13, 7, 1, 20, 19),
+    (0, 21, 17, 13, 9, 11, 7, 3, 24, 15, 22, 18, 14, 5, 1, 8, 4, 20, 16, 12, 19, 10, 6, 2, 23),
+    (0, 8, 11, 19, 22, 7, 10, 18, 21, 4, 14, 17, 20, 3, 6, 16, 24, 2, 5, 13, 23, 1, 9, 12, 15),
+    (0, 16, 7, 23, 14, 10, 1, 17, 8, 24, 20, 11, 2, 18, 9, 5, 21, 12, 3, 19, 15, 6, 22, 13, 4),
+)
+
+def P1_TO_P0(wide):
+    t = wide[ 5]
+    wide[ 5] = wide[ 3]
+    wide[ 3] = wide[18]
+    wide[18] = wide[17]
+    wide[17] = wide[11]
+    wide[11] = wide[ 7]
+    wide[ 7] = wide[10]
+    wide[10] = wide[ 1]
+    wide[ 1] = wide[ 6]
+    wide[ 6] = wide[ 9]
+    wide[ 9] = wide[22]
+    wide[22] = wide[14]
+    wide[14] = wide[20]
+    wide[20] = wide[ 2]
+    wide[ 2] = wide[12]
+    wide[12] = wide[13]
+    wide[13] = wide[19]
+    wide[19] = wide[23]
+    wide[23] = wide[15]
+    wide[15] = wide[ 4]
+    wide[ 4] = wide[24]
+    wide[24] = wide[21]
+    wide[21] = wide[ 8]
+    wide[ 8] = wide[16]
+    wide[16] = t
+
+def KF_ELT(wide, r, s, k):
+    THETA(P(wide, Pn[r]))
+    RHO(P(wide, Pn[r]))
+    KHI(P(wide, Pn[s]))
+    wide[ 0] = wide[ 0] ^ k
+
+def KECCAK_F_1600(wide):
+    for j in range(24):
+        KF_ELT(wide, 0,  1, RC[j + 0])
+        P1_TO_P0(wide)
+
+class keccak:
+    def __init__(self, out_size):
+        self.init(out_size)
+
+    def init(self, out_size):
+        self.out_size = out_size
+        self.wide = list(0 for i in range(25))
+        #
+        # Initialization for the "lane complement".
+        #
+        self.wide[ 1] = 0xFFFFFFFFFFFFFFFF
+        self.wide[ 2] = 0xFFFFFFFFFFFFFFFF
+        self.wide[ 8] = 0xFFFFFFFFFFFFFFFF
+        self.wide[12] = 0xFFFFFFFFFFFFFFFF
+        self.wide[17] = 0xFFFFFFFFFFFFFFFF
+        self.wide[20] = 0xFFFFFFFFFFFFFFFF
+        self.ptr = 0
+        self.lim = 200 - (out_size >> 2)
+
+        self.buf = bytearray(144)
+
+    def INPUT_BUF(self, buf, size):
+        for j in range(0, size, 8):
+            self.wide[j >> 3] ^= struct.unpack('<Q', buf[j:j+8])[0]
+
+    def core(self, data, nlen, lim):
+        buf = self.buf;
+        ptr = self.ptr;
+
+        if nlen < (lim - ptr):
+            buf[ptr:ptr + nlen] = data
+            self.ptr = ptr + nlen
+            return
+
+        while nlen > 0:
+            clen = (lim - ptr)
+            if clen > nlen:
+                clen = nlen
+            buf[ptr:ptr + clen] = data
+            ptr += clen
+            data = data[clen:]
+            nlen -= clen
+            if ptr == lim:
+                self.INPUT_BUF(buf, lim)
+                KECCAK_F_1600(self.wide)
+                ptr = 0
+        self.ptr = ptr
+
+    def write(self, data):
+        self.core(data, len(data), 136)
+
+    def digest(self):
+        d = self.out_size // 8
+        lim = {28: 144, 32: 136, 48: 104, 64: 72}[d]
+        utmp = bytearray(lim + 1)
+
+        eb = 1
+        if self.ptr == (lim - 1):
+            utmp[0] = eb | 0x80
+            j = 1
+        else:
+            j = lim - self.ptr
+            utmp[0] = eb
+            utmp[j - 1] = 0x80
+        self.core(utmp, j, lim)
+        # Finalize the "lane complement"
+        self.wide[ 1] = SPH_T64(~self.wide[ 1])
+        self.wide[ 2] = SPH_T64(~self.wide[ 2])
+        self.wide[ 8] = SPH_T64(~self.wide[ 8])
+        self.wide[12] = SPH_T64(~self.wide[12])
+        self.wide[17] = SPH_T64(~self.wide[17])
+        self.wide[20] = SPH_T64(~self.wide[20])
+        for j in range(0, d, 8):
+            utmp[j:j+8] = struct.pack('<Q', self.wide[j >> 3])
+        self.init(d << 3)
+        return bytes(utmp[:d])
+
+keccak256 = lambda: keccak(256)
diff --git a/qa/rpc-tests/test_framework/mininode.py b/qa/rpc-tests/test_framework/mininode.py
index 5b563c58ae1e1..ecd232f551471 100755
--- a/qa/rpc-tests/test_framework/mininode.py
+++ b/qa/rpc-tests/test_framework/mininode.py
@@ -28,6 +28,7 @@
 import time
 import sys
 import random
+from .keccak import keccak256
 from .util import hex_str_to_bytes, bytes_to_hex_str
 from io import BytesIO
 from codecs import encode
@@ -73,9 +74,30 @@ def sha256(s):
 def ripemd160(s):
     return hashlib.new('ripemd160', s).digest()
 
+def hash160(s):
+    return ripemd160(sha256(s))
+
 def hash256(s):
     return sha256(sha256(s))
 
+def upgrade_160_hash_to_256(algo):
+    def algowrapper(s):
+        return algo(s) + (b'\0' * 12)
+    return algowrapper
+
+def keccak(s):
+    ctx = keccak256()
+    ctx.write(s)
+    return ctx.digest()
+
+powalgos = (sha256, hash256, upgrade_160_hash_to_256(ripemd160), upgrade_160_hash_to_256(hash160), keccak)
+def powhash(s, nTime):
+    if nTime < 1296688603:
+        algo = hash256
+    else:
+        algo = powalgos[int(nTime / 3600) % len(powalgos)]
+    return algo(s)
+
 def ser_compact_size(l):
     r = b""
     if l < 253:
@@ -577,8 +599,9 @@ def calc_sha256(self):
             r += struct.pack("<I", self.nTime)
             r += struct.pack("<I", self.nBits)
             r += struct.pack("<I", self.nNonce)
-            self.sha256 = uint256_from_str(hash256(r))
-            self.hash = encode(hash256(r)[::-1], 'hex_codec').decode('ascii')
+            rawhash = powhash(r, self.nTime)
+            self.sha256 = uint256_from_str(rawhash)
+            self.hash = encode(rawhash[::-1], 'hex_codec').decode('ascii')
 
     def rehash(self):
         self.sha256 = None
diff --git a/src/Makefile.am b/src/Makefile.am
index 76dbbced6cd4c..4107cb6e5525d 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -247,6 +247,7 @@ crypto_libbitcoin_crypto_a_SOURCES = \
   crypto/hmac_sha256.h \
   crypto/hmac_sha512.cpp \
   crypto/hmac_sha512.h \
+  crypto/keccak.cpp \
   crypto/ripemd160.cpp \
   crypto/ripemd160.h \
   crypto/sha1.cpp \
@@ -254,7 +255,9 @@ crypto_libbitcoin_crypto_a_SOURCES = \
   crypto/sha256.cpp \
   crypto/sha256.h \
   crypto/sha512.cpp \
-  crypto/sha512.h
+  crypto/sha512.h \
+  crypto/sph_keccak.h \
+  crypto/sph_types.h
 
 # consensus: shared between all executables that validate any consensus rules.
 libbitcoin_consensus_a_CPPFLAGS = $(AM_CPPFLAGS) $(BITCOIN_INCLUDES)
@@ -270,7 +273,6 @@ libbitcoin_consensus_a_SOURCES = \
   hash.cpp \
   hash.h \
   prevector.h \
-  primitives/block.cpp \
   primitives/block.h \
   primitives/transaction.cpp \
   primitives/transaction.h \
@@ -306,6 +308,7 @@ libbitcoin_common_a_SOURCES = \
   keystore.cpp \
   netaddress.cpp \
   netbase.cpp \
+  primitives/block.cpp \
   protocol.cpp \
   scheduler.cpp \
   script/sign.cpp \
diff --git a/src/chain.cpp b/src/chain.cpp
index a5b369c4fc4c3..bde337a7f5658 100644
--- a/src/chain.cpp
+++ b/src/chain.cpp
@@ -4,6 +4,8 @@
 // file COPYING or http://www.opensource.org/licenses/mit-license.php.
 
 #include "chain.h"
+#include "chainparams.h"
+#include "consensus/params.h"
 
 /**
  * CChain implementation
@@ -111,6 +113,26 @@ const CBlockIndex* CBlockIndex::GetAncestor(int height) const
     return const_cast<CBlockIndex*>(this)->GetAncestor(height);
 }
 
+int64_t CBlockIndex::GetEarliestNextBlockTime(const Consensus::Params& consensusParams) const
+{
+    int64_t nMinTime = GetMedianTimePast() + 1;
+    int64_t nMaxTime = GetBlockTime();
+    const auto current_pow_algo = consensusParams.PowAlgorithmForTime(nMaxTime);
+    if (nMinTime < nMaxTime && current_pow_algo != consensusParams.PowAlgorithmForTime(nMinTime)) {
+        int64_t nTryTime;
+        ++nMinTime;
+        while (nMinTime < nMaxTime) {
+            nTryTime = nMinTime + ((nMaxTime - nMinTime) / 2);
+            if (current_pow_algo != consensusParams.PowAlgorithmForTime(nTryTime)) {
+                nMinTime = nTryTime + 1;
+            } else {
+                nMaxTime = nTryTime - 1;
+            }
+        }
+    }
+    return nMinTime;
+}
+
 void CBlockIndex::BuildSkip()
 {
     if (pprev)
diff --git a/src/chain.h b/src/chain.h
index acb29b667b760..f4b42b97a5e64 100644
--- a/src/chain.h
+++ b/src/chain.h
@@ -306,6 +306,8 @@ class CBlockIndex
         return pbegin[(pend - pbegin)/2];
     }
 
+    int64_t GetEarliestNextBlockTime(const Consensus::Params&) const;
+
     std::string ToString() const
     {
         return strprintf("CBlockIndex(pprev=%p, nHeight=%d, merkle=%s, hashBlock=%s)",
diff --git a/src/chainparams.cpp b/src/chainparams.cpp
index e37ac1dccdefa..6428e15e9e936 100644
--- a/src/chainparams.cpp
+++ b/src/chainparams.cpp
@@ -100,6 +100,7 @@ class CMainParams : public CChainParams {
         consensus.nMinimumChainWork = uint256S("0x0000000000000000000000000000000000000000003f94d1ad391682fe038bf5");
 
         consensus.HardforkTime = std::numeric_limits<int64_t>::max();
+        consensus.nPowChangeTargetShift = 20;
 
         // By default assume that the signatures in ancestors of this block are valid.
         consensus.defaultAssumeValid = uint256S("0x00000000000000000013176bf8d7dfeab4e1db31dc93bc311b436e82ab226b90"); //453354
@@ -117,7 +118,7 @@ class CMainParams : public CChainParams {
         nPruneAfterHeight = 100000;
 
         genesis = CreateGenesisBlock(1231006505, 2083236893, 0x1d00ffff, 1, 50 * COIN);
-        consensus.hashGenesisBlock = genesis.GetHash();
+        consensus.hashGenesisBlock = genesis.GetHash(consensus);
         assert(consensus.hashGenesisBlock == uint256S("0x000000000019d6689c085ae165831e934ff763ae46a2a6c172b3f1b60a8ce26f"));
         assert(genesis.hashMerkleRoot == uint256S("0x4a5e1e4baab89f3a32518a88c31bc87f618f76673e2cc77ab2127b7afdeda33b"));
 
@@ -206,6 +207,7 @@ class CTestNetParams : public CChainParams {
         consensus.nMinimumChainWork = uint256S("0x00000000000000000000000000000000000000000000001f057509eba81aed91");
 
         consensus.HardforkTime = std::numeric_limits<int64_t>::max();
+        consensus.nPowChangeTargetShift = 20;
 
         // By default assume that the signatures in ancestors of this block are valid.
         consensus.defaultAssumeValid = uint256S("0x00000000000128796ee387cf110ccb9d2f36cffaf7f73079c995377c65ac0dcc"); //1079274
@@ -218,7 +220,7 @@ class CTestNetParams : public CChainParams {
         nPruneAfterHeight = 1000;
 
         genesis = CreateGenesisBlock(1296688602, 414098458, 0x1d00ffff, 1, 50 * COIN);
-        consensus.hashGenesisBlock = genesis.GetHash();
+        consensus.hashGenesisBlock = genesis.GetHash(consensus);
         assert(consensus.hashGenesisBlock == uint256S("0x000000000933ea01ad0ee984209779baaec3ced90fa3f408719526f8d77f4943"));
         assert(genesis.hashMerkleRoot == uint256S("0x4a5e1e4baab89f3a32518a88c31bc87f618f76673e2cc77ab2127b7afdeda33b"));
 
@@ -292,7 +294,9 @@ class CRegTestParams : public CChainParams {
         // The best chain should have at least this much work.
         consensus.nMinimumChainWork = uint256S("0x00");
 
-        consensus.HardforkTime = std::numeric_limits<int64_t>::max();
+        consensus.HardforkTime = 1296688603;  // Just past the genesis block
+        consensus.PowChangeAlgo = HashAlgorithm::NUM_HASH_ALGOS;
+        consensus.nPowChangeTargetShift = 20;
 
         // By default assume that the signatures in ancestors of this block are valid.
         consensus.defaultAssumeValid = uint256S("0x00");
@@ -305,7 +309,7 @@ class CRegTestParams : public CChainParams {
         nPruneAfterHeight = 1000;
 
         genesis = CreateGenesisBlock(1296688602, 2, 0x207fffff, 1, 50 * COIN);
-        consensus.hashGenesisBlock = genesis.GetHash();
+        consensus.hashGenesisBlock = genesis.GetHash(consensus);
         assert(consensus.hashGenesisBlock == uint256S("0x0f9188f13cb7b2c71f2a335e3a4fc328bf5beb436012afca590b1a11466e2206"));
         assert(genesis.hashMerkleRoot == uint256S("0x4a5e1e4baab89f3a32518a88c31bc87f618f76673e2cc77ab2127b7afdeda33b"));
 
diff --git a/src/consensus/params.h b/src/consensus/params.h
index 1c5739cffa4d3..ad24d1d47183a 100644
--- a/src/consensus/params.h
+++ b/src/consensus/params.h
@@ -6,6 +6,7 @@
 #ifndef BITCOIN_CONSENSUS_PARAMS_H
 #define BITCOIN_CONSENSUS_PARAMS_H
 
+#include "hash.h"
 #include "uint256.h"
 #include <map>
 #include <string>
@@ -65,6 +66,19 @@ struct Params {
 
     /** Hardfork parameters */
     int64_t HardforkTime;
+    HashAlgorithm PowChangeAlgo;
+    int nPowChangeTargetShift;
+    HashAlgorithm PowAlgorithmForTime(int64_t nTime) const {
+        if (nTime >= HardforkTime) {
+            if (PowChangeAlgo == HashAlgorithm::NUM_HASH_ALGOS) {
+                // Indicates a rotating hash algo, for testing
+                return (HashAlgorithm)((nTime / 3600) % (unsigned int)HashAlgorithm::NUM_HASH_ALGOS);
+            }
+            return PowChangeAlgo;
+        } else {
+            return HashAlgorithm::SHA256d;
+        }
+    }
 
     uint256 defaultAssumeValid;
 };
diff --git a/src/crypto/keccak.cpp b/src/crypto/keccak.cpp
new file mode 100644
index 0000000000000..b6e0284ac24b1
--- /dev/null
+++ b/src/crypto/keccak.cpp
@@ -0,0 +1,1829 @@
+/* $Id: keccak.c 259 2011-07-19 22:11:27Z tp $ */
+/*
+ * Keccak implementation.
+ *
+ * ==========================(LICENSE BEGIN)============================
+ *
+ * Copyright (c) 2007-2010  Projet RNRT SAPHIR
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ===========================(LICENSE END)=============================
+ *
+ * @author   Thomas Pornin <thomas.pornin@cryptolog.com>
+ */
+ 
+#include <stddef.h>
+#include <string.h>
+ 
+#include "sph_keccak.h"
+ 
+#ifdef __cplusplus
+extern "C"{
+#endif
+ 
+/*
+ * Parameters:
+ *
+ *  SPH_KECCAK_64          use a 64-bit type
+ *  SPH_KECCAK_UNROLL      number of loops to unroll (0/undef for full unroll)
+ *  SPH_KECCAK_INTERLEAVE  use bit-interleaving (32-bit type only)
+ *  SPH_KECCAK_NOCOPY      do not copy the state into local variables
+ *
+ * If there is no usable 64-bit type, the code automatically switches
+ * back to the 32-bit implementation.
+ *
+ * Some tests on an Intel Core2 Q6600 (both 64-bit and 32-bit, 32 kB L1
+ * code cache), a PowerPC (G3, 32 kB L1 code cache), an ARM920T core
+ * (16 kB L1 code cache), and a small MIPS-compatible CPU (Broadcom BCM3302,
+ * 8 kB L1 code cache), seem to show that the following are optimal:
+ *
+ * -- x86, 64-bit: use the 64-bit implementation, unroll 8 rounds,
+ * do not copy the state; unrolling 2, 6 or all rounds also provides
+ * near-optimal performance.
+ * -- x86, 32-bit: use the 32-bit implementation, unroll 6 rounds,
+ * interleave, do not copy the state. Unrolling 1, 2, 4 or 8 rounds
+ * also provides near-optimal performance.
+ * -- PowerPC: use the 64-bit implementation, unroll 8 rounds,
+ * copy the state. Unrolling 4 or 6 rounds is near-optimal.
+ * -- ARM: use the 64-bit implementation, unroll 2 or 4 rounds,
+ * copy the state.
+ * -- MIPS: use the 64-bit implementation, unroll 2 rounds, copy
+ * the state. Unrolling only 1 round is also near-optimal.
+ *
+ * Also, interleaving does not always yield actual improvements when
+ * using a 32-bit implementation; in particular when the architecture
+ * does not offer a native rotation opcode (interleaving replaces one
+ * 64-bit rotation with two 32-bit rotations, which is a gain only if
+ * there is a native 32-bit rotation opcode and not a native 64-bit
+ * rotation opcode; also, interleaving implies a small overhead when
+ * processing input words).
+ *
+ * To sum up:
+ * -- when possible, use the 64-bit code
+ * -- exception: on 32-bit x86, use 32-bit code
+ * -- when using 32-bit code, use interleaving
+ * -- copy the state, except on x86
+ * -- unroll 8 rounds on "big" machine, 2 rounds on "small" machines
+ */
+ 
+#if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_KECCAK
+#define SPH_SMALL_FOOTPRINT_KECCAK   1
+#endif
+ 
+/*
+ * By default, we select the 64-bit implementation if a 64-bit type
+ * is available, unless a 32-bit x86 is detected.
+ */
+#if !defined SPH_KECCAK_64 && SPH_64 \
+        && !(defined __i386__ || SPH_I386_GCC || SPH_I386_MSVC)
+#define SPH_KECCAK_64   1
+#endif
+ 
+/*
+ * If using a 32-bit implementation, we prefer to interleave.
+ */
+#if !SPH_KECCAK_64 && !defined SPH_KECCAK_INTERLEAVE
+#define SPH_KECCAK_INTERLEAVE   1
+#endif
+ 
+/*
+ * Unroll 8 rounds on big systems, 2 rounds on small systems.
+ */
+#ifndef SPH_KECCAK_UNROLL
+#if SPH_SMALL_FOOTPRINT_KECCAK
+#define SPH_KECCAK_UNROLL   2
+#else
+#define SPH_KECCAK_UNROLL   8
+#endif
+#endif
+ 
+/*
+ * We do not want to copy the state to local variables on x86 (32-bit
+ * and 64-bit alike).
+ */
+#ifndef SPH_KECCAK_NOCOPY
+#if defined __i386__ || defined __x86_64 || SPH_I386_MSVC || SPH_I386_GCC
+#define SPH_KECCAK_NOCOPY   1
+#else
+#define SPH_KECCAK_NOCOPY   0
+#endif
+#endif
+ 
+#ifdef _MSC_VER
+#pragma warning (disable: 4146)
+#endif
+ 
+#if SPH_KECCAK_64
+ 
+static const sph_u64 RC[] = {
+        SPH_C64(0x0000000000000001), SPH_C64(0x0000000000008082),
+        SPH_C64(0x800000000000808A), SPH_C64(0x8000000080008000),
+        SPH_C64(0x000000000000808B), SPH_C64(0x0000000080000001),
+        SPH_C64(0x8000000080008081), SPH_C64(0x8000000000008009),
+        SPH_C64(0x000000000000008A), SPH_C64(0x0000000000000088),
+        SPH_C64(0x0000000080008009), SPH_C64(0x000000008000000A),
+        SPH_C64(0x000000008000808B), SPH_C64(0x800000000000008B),
+        SPH_C64(0x8000000000008089), SPH_C64(0x8000000000008003),
+        SPH_C64(0x8000000000008002), SPH_C64(0x8000000000000080),
+        SPH_C64(0x000000000000800A), SPH_C64(0x800000008000000A),
+        SPH_C64(0x8000000080008081), SPH_C64(0x8000000000008080),
+        SPH_C64(0x0000000080000001), SPH_C64(0x8000000080008008)
+};
+ 
+#if SPH_KECCAK_NOCOPY
+ 
+#define a00   (kc->u.wide[ 0])
+#define a10   (kc->u.wide[ 1])
+#define a20   (kc->u.wide[ 2])
+#define a30   (kc->u.wide[ 3])
+#define a40   (kc->u.wide[ 4])
+#define a01   (kc->u.wide[ 5])
+#define a11   (kc->u.wide[ 6])
+#define a21   (kc->u.wide[ 7])
+#define a31   (kc->u.wide[ 8])
+#define a41   (kc->u.wide[ 9])
+#define a02   (kc->u.wide[10])
+#define a12   (kc->u.wide[11])
+#define a22   (kc->u.wide[12])
+#define a32   (kc->u.wide[13])
+#define a42   (kc->u.wide[14])
+#define a03   (kc->u.wide[15])
+#define a13   (kc->u.wide[16])
+#define a23   (kc->u.wide[17])
+#define a33   (kc->u.wide[18])
+#define a43   (kc->u.wide[19])
+#define a04   (kc->u.wide[20])
+#define a14   (kc->u.wide[21])
+#define a24   (kc->u.wide[22])
+#define a34   (kc->u.wide[23])
+#define a44   (kc->u.wide[24])
+ 
+#define DECL_STATE
+#define READ_STATE(sc)
+#define WRITE_STATE(sc)
+ 
+#define INPUT_BUF(size)   do { \
+                size_t j; \
+                for (j = 0; j < (size); j += 8) { \
+                        kc->u.wide[j >> 3] ^= sph_dec64le_aligned(buf + j); \
+                } \
+        } while (0)
+ 
+#define INPUT_BUF144   INPUT_BUF(144)
+#define INPUT_BUF136   INPUT_BUF(136)
+#define INPUT_BUF104   INPUT_BUF(104)
+#define INPUT_BUF72    INPUT_BUF(72)
+ 
+#else
+ 
+#define DECL_STATE \
+        sph_u64 a00, a01, a02, a03, a04; \
+        sph_u64 a10, a11, a12, a13, a14; \
+        sph_u64 a20, a21, a22, a23, a24; \
+        sph_u64 a30, a31, a32, a33, a34; \
+        sph_u64 a40, a41, a42, a43, a44;
+ 
+#define READ_STATE(state)   do { \
+                a00 = (state)->u.wide[ 0]; \
+                a10 = (state)->u.wide[ 1]; \
+                a20 = (state)->u.wide[ 2]; \
+                a30 = (state)->u.wide[ 3]; \
+                a40 = (state)->u.wide[ 4]; \
+                a01 = (state)->u.wide[ 5]; \
+                a11 = (state)->u.wide[ 6]; \
+                a21 = (state)->u.wide[ 7]; \
+                a31 = (state)->u.wide[ 8]; \
+                a41 = (state)->u.wide[ 9]; \
+                a02 = (state)->u.wide[10]; \
+                a12 = (state)->u.wide[11]; \
+                a22 = (state)->u.wide[12]; \
+                a32 = (state)->u.wide[13]; \
+                a42 = (state)->u.wide[14]; \
+                a03 = (state)->u.wide[15]; \
+                a13 = (state)->u.wide[16]; \
+                a23 = (state)->u.wide[17]; \
+                a33 = (state)->u.wide[18]; \
+                a43 = (state)->u.wide[19]; \
+                a04 = (state)->u.wide[20]; \
+                a14 = (state)->u.wide[21]; \
+                a24 = (state)->u.wide[22]; \
+                a34 = (state)->u.wide[23]; \
+                a44 = (state)->u.wide[24]; \
+        } while (0)
+ 
+#define WRITE_STATE(state)   do { \
+                (state)->u.wide[ 0] = a00; \
+                (state)->u.wide[ 1] = a10; \
+                (state)->u.wide[ 2] = a20; \
+                (state)->u.wide[ 3] = a30; \
+                (state)->u.wide[ 4] = a40; \
+                (state)->u.wide[ 5] = a01; \
+                (state)->u.wide[ 6] = a11; \
+                (state)->u.wide[ 7] = a21; \
+                (state)->u.wide[ 8] = a31; \
+                (state)->u.wide[ 9] = a41; \
+                (state)->u.wide[10] = a02; \
+                (state)->u.wide[11] = a12; \
+                (state)->u.wide[12] = a22; \
+                (state)->u.wide[13] = a32; \
+                (state)->u.wide[14] = a42; \
+                (state)->u.wide[15] = a03; \
+                (state)->u.wide[16] = a13; \
+                (state)->u.wide[17] = a23; \
+                (state)->u.wide[18] = a33; \
+                (state)->u.wide[19] = a43; \
+                (state)->u.wide[20] = a04; \
+                (state)->u.wide[21] = a14; \
+                (state)->u.wide[22] = a24; \
+                (state)->u.wide[23] = a34; \
+                (state)->u.wide[24] = a44; \
+        } while (0)
+ 
+#define INPUT_BUF144   do { \
+                a00 ^= sph_dec64le_aligned(buf +   0); \
+                a10 ^= sph_dec64le_aligned(buf +   8); \
+                a20 ^= sph_dec64le_aligned(buf +  16); \
+                a30 ^= sph_dec64le_aligned(buf +  24); \
+                a40 ^= sph_dec64le_aligned(buf +  32); \
+                a01 ^= sph_dec64le_aligned(buf +  40); \
+                a11 ^= sph_dec64le_aligned(buf +  48); \
+                a21 ^= sph_dec64le_aligned(buf +  56); \
+                a31 ^= sph_dec64le_aligned(buf +  64); \
+                a41 ^= sph_dec64le_aligned(buf +  72); \
+                a02 ^= sph_dec64le_aligned(buf +  80); \
+                a12 ^= sph_dec64le_aligned(buf +  88); \
+                a22 ^= sph_dec64le_aligned(buf +  96); \
+                a32 ^= sph_dec64le_aligned(buf + 104); \
+                a42 ^= sph_dec64le_aligned(buf + 112); \
+                a03 ^= sph_dec64le_aligned(buf + 120); \
+                a13 ^= sph_dec64le_aligned(buf + 128); \
+                a23 ^= sph_dec64le_aligned(buf + 136); \
+        } while (0)
+ 
+#define INPUT_BUF136   do { \
+                a00 ^= sph_dec64le_aligned(buf +   0); \
+                a10 ^= sph_dec64le_aligned(buf +   8); \
+                a20 ^= sph_dec64le_aligned(buf +  16); \
+                a30 ^= sph_dec64le_aligned(buf +  24); \
+                a40 ^= sph_dec64le_aligned(buf +  32); \
+                a01 ^= sph_dec64le_aligned(buf +  40); \
+                a11 ^= sph_dec64le_aligned(buf +  48); \
+                a21 ^= sph_dec64le_aligned(buf +  56); \
+                a31 ^= sph_dec64le_aligned(buf +  64); \
+                a41 ^= sph_dec64le_aligned(buf +  72); \
+                a02 ^= sph_dec64le_aligned(buf +  80); \
+                a12 ^= sph_dec64le_aligned(buf +  88); \
+                a22 ^= sph_dec64le_aligned(buf +  96); \
+                a32 ^= sph_dec64le_aligned(buf + 104); \
+                a42 ^= sph_dec64le_aligned(buf + 112); \
+                a03 ^= sph_dec64le_aligned(buf + 120); \
+                a13 ^= sph_dec64le_aligned(buf + 128); \
+        } while (0)
+ 
+#define INPUT_BUF104   do { \
+                a00 ^= sph_dec64le_aligned(buf +   0); \
+                a10 ^= sph_dec64le_aligned(buf +   8); \
+                a20 ^= sph_dec64le_aligned(buf +  16); \
+                a30 ^= sph_dec64le_aligned(buf +  24); \
+                a40 ^= sph_dec64le_aligned(buf +  32); \
+                a01 ^= sph_dec64le_aligned(buf +  40); \
+                a11 ^= sph_dec64le_aligned(buf +  48); \
+                a21 ^= sph_dec64le_aligned(buf +  56); \
+                a31 ^= sph_dec64le_aligned(buf +  64); \
+                a41 ^= sph_dec64le_aligned(buf +  72); \
+                a02 ^= sph_dec64le_aligned(buf +  80); \
+                a12 ^= sph_dec64le_aligned(buf +  88); \
+                a22 ^= sph_dec64le_aligned(buf +  96); \
+        } while (0)
+ 
+#define INPUT_BUF72   do { \
+                a00 ^= sph_dec64le_aligned(buf +   0); \
+                a10 ^= sph_dec64le_aligned(buf +   8); \
+                a20 ^= sph_dec64le_aligned(buf +  16); \
+                a30 ^= sph_dec64le_aligned(buf +  24); \
+                a40 ^= sph_dec64le_aligned(buf +  32); \
+                a01 ^= sph_dec64le_aligned(buf +  40); \
+                a11 ^= sph_dec64le_aligned(buf +  48); \
+                a21 ^= sph_dec64le_aligned(buf +  56); \
+                a31 ^= sph_dec64le_aligned(buf +  64); \
+        } while (0)
+ 
+#define INPUT_BUF(lim)   do { \
+                a00 ^= sph_dec64le_aligned(buf +   0); \
+                a10 ^= sph_dec64le_aligned(buf +   8); \
+                a20 ^= sph_dec64le_aligned(buf +  16); \
+                a30 ^= sph_dec64le_aligned(buf +  24); \
+                a40 ^= sph_dec64le_aligned(buf +  32); \
+                a01 ^= sph_dec64le_aligned(buf +  40); \
+                a11 ^= sph_dec64le_aligned(buf +  48); \
+                a21 ^= sph_dec64le_aligned(buf +  56); \
+                a31 ^= sph_dec64le_aligned(buf +  64); \
+                if ((lim) == 72) \
+                        break; \
+                a41 ^= sph_dec64le_aligned(buf +  72); \
+                a02 ^= sph_dec64le_aligned(buf +  80); \
+                a12 ^= sph_dec64le_aligned(buf +  88); \
+                a22 ^= sph_dec64le_aligned(buf +  96); \
+                if ((lim) == 104) \
+                        break; \
+                a32 ^= sph_dec64le_aligned(buf + 104); \
+                a42 ^= sph_dec64le_aligned(buf + 112); \
+                a03 ^= sph_dec64le_aligned(buf + 120); \
+                a13 ^= sph_dec64le_aligned(buf + 128); \
+                if ((lim) == 136) \
+                        break; \
+                a23 ^= sph_dec64le_aligned(buf + 136); \
+        } while (0)
+ 
+#endif
+ 
+#define DECL64(x)        sph_u64 x
+#define MOV64(d, s)      (d = s)
+#define XOR64(d, a, b)   (d = a ^ b)
+#define AND64(d, a, b)   (d = a & b)
+#define OR64(d, a, b)    (d = a | b)
+#define NOT64(d, s)      (d = SPH_T64(~s))
+#define ROL64(d, v, n)   (d = SPH_ROTL64(v, n))
+#define XOR64_IOTA       XOR64
+ 
+#else
+ 
+static const struct {
+        sph_u32 high, low;
+} RC[] = {
+#if SPH_KECCAK_INTERLEAVE
+        { SPH_C32(0x00000000), SPH_C32(0x00000001) },
+        { SPH_C32(0x00000089), SPH_C32(0x00000000) },
+        { SPH_C32(0x8000008B), SPH_C32(0x00000000) },
+        { SPH_C32(0x80008080), SPH_C32(0x00000000) },
+        { SPH_C32(0x0000008B), SPH_C32(0x00000001) },
+        { SPH_C32(0x00008000), SPH_C32(0x00000001) },
+        { SPH_C32(0x80008088), SPH_C32(0x00000001) },
+        { SPH_C32(0x80000082), SPH_C32(0x00000001) },
+        { SPH_C32(0x0000000B), SPH_C32(0x00000000) },
+        { SPH_C32(0x0000000A), SPH_C32(0x00000000) },
+        { SPH_C32(0x00008082), SPH_C32(0x00000001) },
+        { SPH_C32(0x00008003), SPH_C32(0x00000000) },
+        { SPH_C32(0x0000808B), SPH_C32(0x00000001) },
+        { SPH_C32(0x8000000B), SPH_C32(0x00000001) },
+        { SPH_C32(0x8000008A), SPH_C32(0x00000001) },
+        { SPH_C32(0x80000081), SPH_C32(0x00000001) },
+        { SPH_C32(0x80000081), SPH_C32(0x00000000) },
+        { SPH_C32(0x80000008), SPH_C32(0x00000000) },
+        { SPH_C32(0x00000083), SPH_C32(0x00000000) },
+        { SPH_C32(0x80008003), SPH_C32(0x00000000) },
+        { SPH_C32(0x80008088), SPH_C32(0x00000001) },
+        { SPH_C32(0x80000088), SPH_C32(0x00000000) },
+        { SPH_C32(0x00008000), SPH_C32(0x00000001) },
+        { SPH_C32(0x80008082), SPH_C32(0x00000000) }
+#else
+        { SPH_C32(0x00000000), SPH_C32(0x00000001) },
+        { SPH_C32(0x00000000), SPH_C32(0x00008082) },
+        { SPH_C32(0x80000000), SPH_C32(0x0000808A) },
+        { SPH_C32(0x80000000), SPH_C32(0x80008000) },
+        { SPH_C32(0x00000000), SPH_C32(0x0000808B) },
+        { SPH_C32(0x00000000), SPH_C32(0x80000001) },
+        { SPH_C32(0x80000000), SPH_C32(0x80008081) },
+        { SPH_C32(0x80000000), SPH_C32(0x00008009) },
+        { SPH_C32(0x00000000), SPH_C32(0x0000008A) },
+        { SPH_C32(0x00000000), SPH_C32(0x00000088) },
+        { SPH_C32(0x00000000), SPH_C32(0x80008009) },
+        { SPH_C32(0x00000000), SPH_C32(0x8000000A) },
+        { SPH_C32(0x00000000), SPH_C32(0x8000808B) },
+        { SPH_C32(0x80000000), SPH_C32(0x0000008B) },
+        { SPH_C32(0x80000000), SPH_C32(0x00008089) },
+        { SPH_C32(0x80000000), SPH_C32(0x00008003) },
+        { SPH_C32(0x80000000), SPH_C32(0x00008002) },
+        { SPH_C32(0x80000000), SPH_C32(0x00000080) },
+        { SPH_C32(0x00000000), SPH_C32(0x0000800A) },
+        { SPH_C32(0x80000000), SPH_C32(0x8000000A) },
+        { SPH_C32(0x80000000), SPH_C32(0x80008081) },
+        { SPH_C32(0x80000000), SPH_C32(0x00008080) },
+        { SPH_C32(0x00000000), SPH_C32(0x80000001) },
+        { SPH_C32(0x80000000), SPH_C32(0x80008008) }
+#endif
+};
+ 
+#if SPH_KECCAK_INTERLEAVE
+ 
+#define INTERLEAVE(xl, xh)   do { \
+                sph_u32 l, h, t; \
+                l = (xl); h = (xh); \
+                t = (l ^ (l >> 1)) & SPH_C32(0x22222222); l ^= t ^ (t << 1); \
+                t = (h ^ (h >> 1)) & SPH_C32(0x22222222); h ^= t ^ (t << 1); \
+                t = (l ^ (l >> 2)) & SPH_C32(0x0C0C0C0C); l ^= t ^ (t << 2); \
+                t = (h ^ (h >> 2)) & SPH_C32(0x0C0C0C0C); h ^= t ^ (t << 2); \
+                t = (l ^ (l >> 4)) & SPH_C32(0x00F000F0); l ^= t ^ (t << 4); \
+                t = (h ^ (h >> 4)) & SPH_C32(0x00F000F0); h ^= t ^ (t << 4); \
+                t = (l ^ (l >> 8)) & SPH_C32(0x0000FF00); l ^= t ^ (t << 8); \
+                t = (h ^ (h >> 8)) & SPH_C32(0x0000FF00); h ^= t ^ (t << 8); \
+                t = (l ^ SPH_T32(h << 16)) & SPH_C32(0xFFFF0000); \
+                l ^= t; h ^= t >> 16; \
+                (xl) = l; (xh) = h; \
+        } while (0)
+ 
+#define UNINTERLEAVE(xl, xh)   do { \
+                sph_u32 l, h, t; \
+                l = (xl); h = (xh); \
+                t = (l ^ SPH_T32(h << 16)) & SPH_C32(0xFFFF0000); \
+                l ^= t; h ^= t >> 16; \
+                t = (l ^ (l >> 8)) & SPH_C32(0x0000FF00); l ^= t ^ (t << 8); \
+                t = (h ^ (h >> 8)) & SPH_C32(0x0000FF00); h ^= t ^ (t << 8); \
+                t = (l ^ (l >> 4)) & SPH_C32(0x00F000F0); l ^= t ^ (t << 4); \
+                t = (h ^ (h >> 4)) & SPH_C32(0x00F000F0); h ^= t ^ (t << 4); \
+                t = (l ^ (l >> 2)) & SPH_C32(0x0C0C0C0C); l ^= t ^ (t << 2); \
+                t = (h ^ (h >> 2)) & SPH_C32(0x0C0C0C0C); h ^= t ^ (t << 2); \
+                t = (l ^ (l >> 1)) & SPH_C32(0x22222222); l ^= t ^ (t << 1); \
+                t = (h ^ (h >> 1)) & SPH_C32(0x22222222); h ^= t ^ (t << 1); \
+                (xl) = l; (xh) = h; \
+        } while (0)
+ 
+#else
+ 
+#define INTERLEAVE(l, h)
+#define UNINTERLEAVE(l, h)
+ 
+#endif
+ 
+#if SPH_KECCAK_NOCOPY
+ 
+#define a00l   (kc->u.narrow[2 *  0 + 0])
+#define a00h   (kc->u.narrow[2 *  0 + 1])
+#define a10l   (kc->u.narrow[2 *  1 + 0])
+#define a10h   (kc->u.narrow[2 *  1 + 1])
+#define a20l   (kc->u.narrow[2 *  2 + 0])
+#define a20h   (kc->u.narrow[2 *  2 + 1])
+#define a30l   (kc->u.narrow[2 *  3 + 0])
+#define a30h   (kc->u.narrow[2 *  3 + 1])
+#define a40l   (kc->u.narrow[2 *  4 + 0])
+#define a40h   (kc->u.narrow[2 *  4 + 1])
+#define a01l   (kc->u.narrow[2 *  5 + 0])
+#define a01h   (kc->u.narrow[2 *  5 + 1])
+#define a11l   (kc->u.narrow[2 *  6 + 0])
+#define a11h   (kc->u.narrow[2 *  6 + 1])
+#define a21l   (kc->u.narrow[2 *  7 + 0])
+#define a21h   (kc->u.narrow[2 *  7 + 1])
+#define a31l   (kc->u.narrow[2 *  8 + 0])
+#define a31h   (kc->u.narrow[2 *  8 + 1])
+#define a41l   (kc->u.narrow[2 *  9 + 0])
+#define a41h   (kc->u.narrow[2 *  9 + 1])
+#define a02l   (kc->u.narrow[2 * 10 + 0])
+#define a02h   (kc->u.narrow[2 * 10 + 1])
+#define a12l   (kc->u.narrow[2 * 11 + 0])
+#define a12h   (kc->u.narrow[2 * 11 + 1])
+#define a22l   (kc->u.narrow[2 * 12 + 0])
+#define a22h   (kc->u.narrow[2 * 12 + 1])
+#define a32l   (kc->u.narrow[2 * 13 + 0])
+#define a32h   (kc->u.narrow[2 * 13 + 1])
+#define a42l   (kc->u.narrow[2 * 14 + 0])
+#define a42h   (kc->u.narrow[2 * 14 + 1])
+#define a03l   (kc->u.narrow[2 * 15 + 0])
+#define a03h   (kc->u.narrow[2 * 15 + 1])
+#define a13l   (kc->u.narrow[2 * 16 + 0])
+#define a13h   (kc->u.narrow[2 * 16 + 1])
+#define a23l   (kc->u.narrow[2 * 17 + 0])
+#define a23h   (kc->u.narrow[2 * 17 + 1])
+#define a33l   (kc->u.narrow[2 * 18 + 0])
+#define a33h   (kc->u.narrow[2 * 18 + 1])
+#define a43l   (kc->u.narrow[2 * 19 + 0])
+#define a43h   (kc->u.narrow[2 * 19 + 1])
+#define a04l   (kc->u.narrow[2 * 20 + 0])
+#define a04h   (kc->u.narrow[2 * 20 + 1])
+#define a14l   (kc->u.narrow[2 * 21 + 0])
+#define a14h   (kc->u.narrow[2 * 21 + 1])
+#define a24l   (kc->u.narrow[2 * 22 + 0])
+#define a24h   (kc->u.narrow[2 * 22 + 1])
+#define a34l   (kc->u.narrow[2 * 23 + 0])
+#define a34h   (kc->u.narrow[2 * 23 + 1])
+#define a44l   (kc->u.narrow[2 * 24 + 0])
+#define a44h   (kc->u.narrow[2 * 24 + 1])
+ 
+#define DECL_STATE
+#define READ_STATE(state)
+#define WRITE_STATE(state)
+ 
+#define INPUT_BUF(size)   do { \
+                size_t j; \
+                for (j = 0; j < (size); j += 8) { \
+                        sph_u32 tl, th; \
+                        tl = sph_dec32le_aligned(buf + j + 0); \
+                        th = sph_dec32le_aligned(buf + j + 4); \
+                        INTERLEAVE(tl, th); \
+                        kc->u.narrow[(j >> 2) + 0] ^= tl; \
+                        kc->u.narrow[(j >> 2) + 1] ^= th; \
+                } \
+        } while (0)
+ 
+#define INPUT_BUF144   INPUT_BUF(144)
+#define INPUT_BUF136   INPUT_BUF(136)
+#define INPUT_BUF104   INPUT_BUF(104)
+#define INPUT_BUF72    INPUT_BUF(72)
+ 
+#else
+ 
+#define DECL_STATE \
+        sph_u32 a00l, a00h, a01l, a01h, a02l, a02h, a03l, a03h, a04l, a04h; \
+        sph_u32 a10l, a10h, a11l, a11h, a12l, a12h, a13l, a13h, a14l, a14h; \
+        sph_u32 a20l, a20h, a21l, a21h, a22l, a22h, a23l, a23h, a24l, a24h; \
+        sph_u32 a30l, a30h, a31l, a31h, a32l, a32h, a33l, a33h, a34l, a34h; \
+        sph_u32 a40l, a40h, a41l, a41h, a42l, a42h, a43l, a43h, a44l, a44h;
+ 
+#define READ_STATE(state)   do { \
+                a00l = (state)->u.narrow[2 *  0 + 0]; \
+                a00h = (state)->u.narrow[2 *  0 + 1]; \
+                a10l = (state)->u.narrow[2 *  1 + 0]; \
+                a10h = (state)->u.narrow[2 *  1 + 1]; \
+                a20l = (state)->u.narrow[2 *  2 + 0]; \
+                a20h = (state)->u.narrow[2 *  2 + 1]; \
+                a30l = (state)->u.narrow[2 *  3 + 0]; \
+                a30h = (state)->u.narrow[2 *  3 + 1]; \
+                a40l = (state)->u.narrow[2 *  4 + 0]; \
+                a40h = (state)->u.narrow[2 *  4 + 1]; \
+                a01l = (state)->u.narrow[2 *  5 + 0]; \
+                a01h = (state)->u.narrow[2 *  5 + 1]; \
+                a11l = (state)->u.narrow[2 *  6 + 0]; \
+                a11h = (state)->u.narrow[2 *  6 + 1]; \
+                a21l = (state)->u.narrow[2 *  7 + 0]; \
+                a21h = (state)->u.narrow[2 *  7 + 1]; \
+                a31l = (state)->u.narrow[2 *  8 + 0]; \
+                a31h = (state)->u.narrow[2 *  8 + 1]; \
+                a41l = (state)->u.narrow[2 *  9 + 0]; \
+                a41h = (state)->u.narrow[2 *  9 + 1]; \
+                a02l = (state)->u.narrow[2 * 10 + 0]; \
+                a02h = (state)->u.narrow[2 * 10 + 1]; \
+                a12l = (state)->u.narrow[2 * 11 + 0]; \
+                a12h = (state)->u.narrow[2 * 11 + 1]; \
+                a22l = (state)->u.narrow[2 * 12 + 0]; \
+                a22h = (state)->u.narrow[2 * 12 + 1]; \
+                a32l = (state)->u.narrow[2 * 13 + 0]; \
+                a32h = (state)->u.narrow[2 * 13 + 1]; \
+                a42l = (state)->u.narrow[2 * 14 + 0]; \
+                a42h = (state)->u.narrow[2 * 14 + 1]; \
+                a03l = (state)->u.narrow[2 * 15 + 0]; \
+                a03h = (state)->u.narrow[2 * 15 + 1]; \
+                a13l = (state)->u.narrow[2 * 16 + 0]; \
+                a13h = (state)->u.narrow[2 * 16 + 1]; \
+                a23l = (state)->u.narrow[2 * 17 + 0]; \
+                a23h = (state)->u.narrow[2 * 17 + 1]; \
+                a33l = (state)->u.narrow[2 * 18 + 0]; \
+                a33h = (state)->u.narrow[2 * 18 + 1]; \
+                a43l = (state)->u.narrow[2 * 19 + 0]; \
+                a43h = (state)->u.narrow[2 * 19 + 1]; \
+                a04l = (state)->u.narrow[2 * 20 + 0]; \
+                a04h = (state)->u.narrow[2 * 20 + 1]; \
+                a14l = (state)->u.narrow[2 * 21 + 0]; \
+                a14h = (state)->u.narrow[2 * 21 + 1]; \
+                a24l = (state)->u.narrow[2 * 22 + 0]; \
+                a24h = (state)->u.narrow[2 * 22 + 1]; \
+                a34l = (state)->u.narrow[2 * 23 + 0]; \
+                a34h = (state)->u.narrow[2 * 23 + 1]; \
+                a44l = (state)->u.narrow[2 * 24 + 0]; \
+                a44h = (state)->u.narrow[2 * 24 + 1]; \
+        } while (0)
+ 
+#define WRITE_STATE(state)   do { \
+                (state)->u.narrow[2 *  0 + 0] = a00l; \
+                (state)->u.narrow[2 *  0 + 1] = a00h; \
+                (state)->u.narrow[2 *  1 + 0] = a10l; \
+                (state)->u.narrow[2 *  1 + 1] = a10h; \
+                (state)->u.narrow[2 *  2 + 0] = a20l; \
+                (state)->u.narrow[2 *  2 + 1] = a20h; \
+                (state)->u.narrow[2 *  3 + 0] = a30l; \
+                (state)->u.narrow[2 *  3 + 1] = a30h; \
+                (state)->u.narrow[2 *  4 + 0] = a40l; \
+                (state)->u.narrow[2 *  4 + 1] = a40h; \
+                (state)->u.narrow[2 *  5 + 0] = a01l; \
+                (state)->u.narrow[2 *  5 + 1] = a01h; \
+                (state)->u.narrow[2 *  6 + 0] = a11l; \
+                (state)->u.narrow[2 *  6 + 1] = a11h; \
+                (state)->u.narrow[2 *  7 + 0] = a21l; \
+                (state)->u.narrow[2 *  7 + 1] = a21h; \
+                (state)->u.narrow[2 *  8 + 0] = a31l; \
+                (state)->u.narrow[2 *  8 + 1] = a31h; \
+                (state)->u.narrow[2 *  9 + 0] = a41l; \
+                (state)->u.narrow[2 *  9 + 1] = a41h; \
+                (state)->u.narrow[2 * 10 + 0] = a02l; \
+                (state)->u.narrow[2 * 10 + 1] = a02h; \
+                (state)->u.narrow[2 * 11 + 0] = a12l; \
+                (state)->u.narrow[2 * 11 + 1] = a12h; \
+                (state)->u.narrow[2 * 12 + 0] = a22l; \
+                (state)->u.narrow[2 * 12 + 1] = a22h; \
+                (state)->u.narrow[2 * 13 + 0] = a32l; \
+                (state)->u.narrow[2 * 13 + 1] = a32h; \
+                (state)->u.narrow[2 * 14 + 0] = a42l; \
+                (state)->u.narrow[2 * 14 + 1] = a42h; \
+                (state)->u.narrow[2 * 15 + 0] = a03l; \
+                (state)->u.narrow[2 * 15 + 1] = a03h; \
+                (state)->u.narrow[2 * 16 + 0] = a13l; \
+                (state)->u.narrow[2 * 16 + 1] = a13h; \
+                (state)->u.narrow[2 * 17 + 0] = a23l; \
+                (state)->u.narrow[2 * 17 + 1] = a23h; \
+                (state)->u.narrow[2 * 18 + 0] = a33l; \
+                (state)->u.narrow[2 * 18 + 1] = a33h; \
+                (state)->u.narrow[2 * 19 + 0] = a43l; \
+                (state)->u.narrow[2 * 19 + 1] = a43h; \
+                (state)->u.narrow[2 * 20 + 0] = a04l; \
+                (state)->u.narrow[2 * 20 + 1] = a04h; \
+                (state)->u.narrow[2 * 21 + 0] = a14l; \
+                (state)->u.narrow[2 * 21 + 1] = a14h; \
+                (state)->u.narrow[2 * 22 + 0] = a24l; \
+                (state)->u.narrow[2 * 22 + 1] = a24h; \
+                (state)->u.narrow[2 * 23 + 0] = a34l; \
+                (state)->u.narrow[2 * 23 + 1] = a34h; \
+                (state)->u.narrow[2 * 24 + 0] = a44l; \
+                (state)->u.narrow[2 * 24 + 1] = a44h; \
+        } while (0)
+ 
+#define READ64(d, off)   do { \
+                sph_u32 tl, th; \
+                tl = sph_dec32le_aligned(buf + (off)); \
+                th = sph_dec32le_aligned(buf + (off) + 4); \
+                INTERLEAVE(tl, th); \
+                d ## l ^= tl; \
+                d ## h ^= th; \
+        } while (0)
+ 
+#define INPUT_BUF144   do { \
+                READ64(a00,   0); \
+                READ64(a10,   8); \
+                READ64(a20,  16); \
+                READ64(a30,  24); \
+                READ64(a40,  32); \
+                READ64(a01,  40); \
+                READ64(a11,  48); \
+                READ64(a21,  56); \
+                READ64(a31,  64); \
+                READ64(a41,  72); \
+                READ64(a02,  80); \
+                READ64(a12,  88); \
+                READ64(a22,  96); \
+                READ64(a32, 104); \
+                READ64(a42, 112); \
+                READ64(a03, 120); \
+                READ64(a13, 128); \
+                READ64(a23, 136); \
+        } while (0)
+ 
+#define INPUT_BUF136   do { \
+                READ64(a00,   0); \
+                READ64(a10,   8); \
+                READ64(a20,  16); \
+                READ64(a30,  24); \
+                READ64(a40,  32); \
+                READ64(a01,  40); \
+                READ64(a11,  48); \
+                READ64(a21,  56); \
+                READ64(a31,  64); \
+                READ64(a41,  72); \
+                READ64(a02,  80); \
+                READ64(a12,  88); \
+                READ64(a22,  96); \
+                READ64(a32, 104); \
+                READ64(a42, 112); \
+                READ64(a03, 120); \
+                READ64(a13, 128); \
+        } while (0)
+ 
+#define INPUT_BUF104   do { \
+                READ64(a00,   0); \
+                READ64(a10,   8); \
+                READ64(a20,  16); \
+                READ64(a30,  24); \
+                READ64(a40,  32); \
+                READ64(a01,  40); \
+                READ64(a11,  48); \
+                READ64(a21,  56); \
+                READ64(a31,  64); \
+                READ64(a41,  72); \
+                READ64(a02,  80); \
+                READ64(a12,  88); \
+                READ64(a22,  96); \
+        } while (0)
+ 
+#define INPUT_BUF72   do { \
+                READ64(a00,   0); \
+                READ64(a10,   8); \
+                READ64(a20,  16); \
+                READ64(a30,  24); \
+                READ64(a40,  32); \
+                READ64(a01,  40); \
+                READ64(a11,  48); \
+                READ64(a21,  56); \
+                READ64(a31,  64); \
+        } while (0)
+ 
+#define INPUT_BUF(lim)   do { \
+                READ64(a00,   0); \
+                READ64(a10,   8); \
+                READ64(a20,  16); \
+                READ64(a30,  24); \
+                READ64(a40,  32); \
+                READ64(a01,  40); \
+                READ64(a11,  48); \
+                READ64(a21,  56); \
+                READ64(a31,  64); \
+                if ((lim) == 72) \
+                        break; \
+                READ64(a41,  72); \
+                READ64(a02,  80); \
+                READ64(a12,  88); \
+                READ64(a22,  96); \
+                if ((lim) == 104) \
+                        break; \
+                READ64(a32, 104); \
+                READ64(a42, 112); \
+                READ64(a03, 120); \
+                READ64(a13, 128); \
+                if ((lim) == 136) \
+                        break; \
+                READ64(a23, 136); \
+        } while (0)
+ 
+#endif
+ 
+#define DECL64(x)        sph_u64 x ## l, x ## h
+#define MOV64(d, s)      (d ## l = s ## l, d ## h = s ## h)
+#define XOR64(d, a, b)   (d ## l = a ## l ^ b ## l, d ## h = a ## h ^ b ## h)
+#define AND64(d, a, b)   (d ## l = a ## l & b ## l, d ## h = a ## h & b ## h)
+#define OR64(d, a, b)    (d ## l = a ## l | b ## l, d ## h = a ## h | b ## h)
+#define NOT64(d, s)      (d ## l = SPH_T32(~s ## l), d ## h = SPH_T32(~s ## h))
+#define ROL64(d, v, n)   ROL64_ ## n(d, v)
+ 
+#if SPH_KECCAK_INTERLEAVE
+ 
+#define ROL64_odd1(d, v)   do { \
+                sph_u32 tmp; \
+                tmp = v ## l; \
+                d ## l = SPH_T32(v ## h << 1) | (v ## h >> 31); \
+                d ## h = tmp; \
+        } while (0)
+ 
+#define ROL64_odd63(d, v)   do { \
+                sph_u32 tmp; \
+                tmp = SPH_T32(v ## l << 31) | (v ## l >> 1); \
+                d ## l = v ## h; \
+                d ## h = tmp; \
+        } while (0)
+ 
+#define ROL64_odd(d, v, n)   do { \
+                sph_u32 tmp; \
+                tmp = SPH_T32(v ## l << (n - 1)) | (v ## l >> (33 - n)); \
+                d ## l = SPH_T32(v ## h << n) | (v ## h >> (32 - n)); \
+                d ## h = tmp; \
+        } while (0)
+ 
+#define ROL64_even(d, v, n)   do { \
+                d ## l = SPH_T32(v ## l << n) | (v ## l >> (32 - n)); \
+                d ## h = SPH_T32(v ## h << n) | (v ## h >> (32 - n)); \
+        } while (0)
+ 
+#define ROL64_0(d, v)
+#define ROL64_1(d, v)    ROL64_odd1(d, v)
+#define ROL64_2(d, v)    ROL64_even(d, v,  1)
+#define ROL64_3(d, v)    ROL64_odd( d, v,  2)
+#define ROL64_4(d, v)    ROL64_even(d, v,  2)
+#define ROL64_5(d, v)    ROL64_odd( d, v,  3)
+#define ROL64_6(d, v)    ROL64_even(d, v,  3)
+#define ROL64_7(d, v)    ROL64_odd( d, v,  4)
+#define ROL64_8(d, v)    ROL64_even(d, v,  4)
+#define ROL64_9(d, v)    ROL64_odd( d, v,  5)
+#define ROL64_10(d, v)   ROL64_even(d, v,  5)
+#define ROL64_11(d, v)   ROL64_odd( d, v,  6)
+#define ROL64_12(d, v)   ROL64_even(d, v,  6)
+#define ROL64_13(d, v)   ROL64_odd( d, v,  7)
+#define ROL64_14(d, v)   ROL64_even(d, v,  7)
+#define ROL64_15(d, v)   ROL64_odd( d, v,  8)
+#define ROL64_16(d, v)   ROL64_even(d, v,  8)
+#define ROL64_17(d, v)   ROL64_odd( d, v,  9)
+#define ROL64_18(d, v)   ROL64_even(d, v,  9)
+#define ROL64_19(d, v)   ROL64_odd( d, v, 10)
+#define ROL64_20(d, v)   ROL64_even(d, v, 10)
+#define ROL64_21(d, v)   ROL64_odd( d, v, 11)
+#define ROL64_22(d, v)   ROL64_even(d, v, 11)
+#define ROL64_23(d, v)   ROL64_odd( d, v, 12)
+#define ROL64_24(d, v)   ROL64_even(d, v, 12)
+#define ROL64_25(d, v)   ROL64_odd( d, v, 13)
+#define ROL64_26(d, v)   ROL64_even(d, v, 13)
+#define ROL64_27(d, v)   ROL64_odd( d, v, 14)
+#define ROL64_28(d, v)   ROL64_even(d, v, 14)
+#define ROL64_29(d, v)   ROL64_odd( d, v, 15)
+#define ROL64_30(d, v)   ROL64_even(d, v, 15)
+#define ROL64_31(d, v)   ROL64_odd( d, v, 16)
+#define ROL64_32(d, v)   ROL64_even(d, v, 16)
+#define ROL64_33(d, v)   ROL64_odd( d, v, 17)
+#define ROL64_34(d, v)   ROL64_even(d, v, 17)
+#define ROL64_35(d, v)   ROL64_odd( d, v, 18)
+#define ROL64_36(d, v)   ROL64_even(d, v, 18)
+#define ROL64_37(d, v)   ROL64_odd( d, v, 19)
+#define ROL64_38(d, v)   ROL64_even(d, v, 19)
+#define ROL64_39(d, v)   ROL64_odd( d, v, 20)
+#define ROL64_40(d, v)   ROL64_even(d, v, 20)
+#define ROL64_41(d, v)   ROL64_odd( d, v, 21)
+#define ROL64_42(d, v)   ROL64_even(d, v, 21)
+#define ROL64_43(d, v)   ROL64_odd( d, v, 22)
+#define ROL64_44(d, v)   ROL64_even(d, v, 22)
+#define ROL64_45(d, v)   ROL64_odd( d, v, 23)
+#define ROL64_46(d, v)   ROL64_even(d, v, 23)
+#define ROL64_47(d, v)   ROL64_odd( d, v, 24)
+#define ROL64_48(d, v)   ROL64_even(d, v, 24)
+#define ROL64_49(d, v)   ROL64_odd( d, v, 25)
+#define ROL64_50(d, v)   ROL64_even(d, v, 25)
+#define ROL64_51(d, v)   ROL64_odd( d, v, 26)
+#define ROL64_52(d, v)   ROL64_even(d, v, 26)
+#define ROL64_53(d, v)   ROL64_odd( d, v, 27)
+#define ROL64_54(d, v)   ROL64_even(d, v, 27)
+#define ROL64_55(d, v)   ROL64_odd( d, v, 28)
+#define ROL64_56(d, v)   ROL64_even(d, v, 28)
+#define ROL64_57(d, v)   ROL64_odd( d, v, 29)
+#define ROL64_58(d, v)   ROL64_even(d, v, 29)
+#define ROL64_59(d, v)   ROL64_odd( d, v, 30)
+#define ROL64_60(d, v)   ROL64_even(d, v, 30)
+#define ROL64_61(d, v)   ROL64_odd( d, v, 31)
+#define ROL64_62(d, v)   ROL64_even(d, v, 31)
+#define ROL64_63(d, v)   ROL64_odd63(d, v)
+ 
+#else
+ 
+#define ROL64_small(d, v, n)   do { \
+                sph_u32 tmp; \
+                tmp = SPH_T32(v ## l << n) | (v ## h >> (32 - n)); \
+                d ## h = SPH_T32(v ## h << n) | (v ## l >> (32 - n)); \
+                d ## l = tmp; \
+        } while (0)
+ 
+#define ROL64_0(d, v)    0
+#define ROL64_1(d, v)    ROL64_small(d, v, 1)
+#define ROL64_2(d, v)    ROL64_small(d, v, 2)
+#define ROL64_3(d, v)    ROL64_small(d, v, 3)
+#define ROL64_4(d, v)    ROL64_small(d, v, 4)
+#define ROL64_5(d, v)    ROL64_small(d, v, 5)
+#define ROL64_6(d, v)    ROL64_small(d, v, 6)
+#define ROL64_7(d, v)    ROL64_small(d, v, 7)
+#define ROL64_8(d, v)    ROL64_small(d, v, 8)
+#define ROL64_9(d, v)    ROL64_small(d, v, 9)
+#define ROL64_10(d, v)   ROL64_small(d, v, 10)
+#define ROL64_11(d, v)   ROL64_small(d, v, 11)
+#define ROL64_12(d, v)   ROL64_small(d, v, 12)
+#define ROL64_13(d, v)   ROL64_small(d, v, 13)
+#define ROL64_14(d, v)   ROL64_small(d, v, 14)
+#define ROL64_15(d, v)   ROL64_small(d, v, 15)
+#define ROL64_16(d, v)   ROL64_small(d, v, 16)
+#define ROL64_17(d, v)   ROL64_small(d, v, 17)
+#define ROL64_18(d, v)   ROL64_small(d, v, 18)
+#define ROL64_19(d, v)   ROL64_small(d, v, 19)
+#define ROL64_20(d, v)   ROL64_small(d, v, 20)
+#define ROL64_21(d, v)   ROL64_small(d, v, 21)
+#define ROL64_22(d, v)   ROL64_small(d, v, 22)
+#define ROL64_23(d, v)   ROL64_small(d, v, 23)
+#define ROL64_24(d, v)   ROL64_small(d, v, 24)
+#define ROL64_25(d, v)   ROL64_small(d, v, 25)
+#define ROL64_26(d, v)   ROL64_small(d, v, 26)
+#define ROL64_27(d, v)   ROL64_small(d, v, 27)
+#define ROL64_28(d, v)   ROL64_small(d, v, 28)
+#define ROL64_29(d, v)   ROL64_small(d, v, 29)
+#define ROL64_30(d, v)   ROL64_small(d, v, 30)
+#define ROL64_31(d, v)   ROL64_small(d, v, 31)
+ 
+#define ROL64_32(d, v)   do { \
+                sph_u32 tmp; \
+                tmp = v ## l; \
+                d ## l = v ## h; \
+                d ## h = tmp; \
+        } while (0)
+ 
+#define ROL64_big(d, v, n)   do { \
+                sph_u32 trl, trh; \
+                ROL64_small(tr, v, n); \
+                d ## h = trl; \
+                d ## l = trh; \
+        } while (0)
+ 
+#define ROL64_33(d, v)   ROL64_big(d, v, 1)
+#define ROL64_34(d, v)   ROL64_big(d, v, 2)
+#define ROL64_35(d, v)   ROL64_big(d, v, 3)
+#define ROL64_36(d, v)   ROL64_big(d, v, 4)
+#define ROL64_37(d, v)   ROL64_big(d, v, 5)
+#define ROL64_38(d, v)   ROL64_big(d, v, 6)
+#define ROL64_39(d, v)   ROL64_big(d, v, 7)
+#define ROL64_40(d, v)   ROL64_big(d, v, 8)
+#define ROL64_41(d, v)   ROL64_big(d, v, 9)
+#define ROL64_42(d, v)   ROL64_big(d, v, 10)
+#define ROL64_43(d, v)   ROL64_big(d, v, 11)
+#define ROL64_44(d, v)   ROL64_big(d, v, 12)
+#define ROL64_45(d, v)   ROL64_big(d, v, 13)
+#define ROL64_46(d, v)   ROL64_big(d, v, 14)
+#define ROL64_47(d, v)   ROL64_big(d, v, 15)
+#define ROL64_48(d, v)   ROL64_big(d, v, 16)
+#define ROL64_49(d, v)   ROL64_big(d, v, 17)
+#define ROL64_50(d, v)   ROL64_big(d, v, 18)
+#define ROL64_51(d, v)   ROL64_big(d, v, 19)
+#define ROL64_52(d, v)   ROL64_big(d, v, 20)
+#define ROL64_53(d, v)   ROL64_big(d, v, 21)
+#define ROL64_54(d, v)   ROL64_big(d, v, 22)
+#define ROL64_55(d, v)   ROL64_big(d, v, 23)
+#define ROL64_56(d, v)   ROL64_big(d, v, 24)
+#define ROL64_57(d, v)   ROL64_big(d, v, 25)
+#define ROL64_58(d, v)   ROL64_big(d, v, 26)
+#define ROL64_59(d, v)   ROL64_big(d, v, 27)
+#define ROL64_60(d, v)   ROL64_big(d, v, 28)
+#define ROL64_61(d, v)   ROL64_big(d, v, 29)
+#define ROL64_62(d, v)   ROL64_big(d, v, 30)
+#define ROL64_63(d, v)   ROL64_big(d, v, 31)
+ 
+#endif
+ 
+#define XOR64_IOTA(d, s, k) \
+        (d ## l = s ## l ^ k.low, d ## h = s ## h ^ k.high)
+ 
+#endif
+ 
+#define TH_ELT(t, c0, c1, c2, c3, c4, d0, d1, d2, d3, d4)   do { \
+                DECL64(tt0); \
+                DECL64(tt1); \
+                DECL64(tt2); \
+                DECL64(tt3); \
+                XOR64(tt0, d0, d1); \
+                XOR64(tt1, d2, d3); \
+                XOR64(tt0, tt0, d4); \
+                XOR64(tt0, tt0, tt1); \
+                ROL64(tt0, tt0, 1); \
+                XOR64(tt2, c0, c1); \
+                XOR64(tt3, c2, c3); \
+                XOR64(tt0, tt0, c4); \
+                XOR64(tt2, tt2, tt3); \
+                XOR64(t, tt0, tt2); \
+        } while (0)
+ 
+#define THETA(b00, b01, b02, b03, b04, b10, b11, b12, b13, b14, \
+        b20, b21, b22, b23, b24, b30, b31, b32, b33, b34, \
+        b40, b41, b42, b43, b44) \
+        do { \
+                DECL64(t0); \
+                DECL64(t1); \
+                DECL64(t2); \
+                DECL64(t3); \
+                DECL64(t4); \
+                TH_ELT(t0, b40, b41, b42, b43, b44, b10, b11, b12, b13, b14); \
+                TH_ELT(t1, b00, b01, b02, b03, b04, b20, b21, b22, b23, b24); \
+                TH_ELT(t2, b10, b11, b12, b13, b14, b30, b31, b32, b33, b34); \
+                TH_ELT(t3, b20, b21, b22, b23, b24, b40, b41, b42, b43, b44); \
+                TH_ELT(t4, b30, b31, b32, b33, b34, b00, b01, b02, b03, b04); \
+                XOR64(b00, b00, t0); \
+                XOR64(b01, b01, t0); \
+                XOR64(b02, b02, t0); \
+                XOR64(b03, b03, t0); \
+                XOR64(b04, b04, t0); \
+                XOR64(b10, b10, t1); \
+                XOR64(b11, b11, t1); \
+                XOR64(b12, b12, t1); \
+                XOR64(b13, b13, t1); \
+                XOR64(b14, b14, t1); \
+                XOR64(b20, b20, t2); \
+                XOR64(b21, b21, t2); \
+                XOR64(b22, b22, t2); \
+                XOR64(b23, b23, t2); \
+                XOR64(b24, b24, t2); \
+                XOR64(b30, b30, t3); \
+                XOR64(b31, b31, t3); \
+                XOR64(b32, b32, t3); \
+                XOR64(b33, b33, t3); \
+                XOR64(b34, b34, t3); \
+                XOR64(b40, b40, t4); \
+                XOR64(b41, b41, t4); \
+                XOR64(b42, b42, t4); \
+                XOR64(b43, b43, t4); \
+                XOR64(b44, b44, t4); \
+        } while (0)
+ 
+#define RHO(b00, b01, b02, b03, b04, b10, b11, b12, b13, b14, \
+        b20, b21, b22, b23, b24, b30, b31, b32, b33, b34, \
+        b40, b41, b42, b43, b44) \
+        do { \
+                /* ROL64(b00, b00,  0); */ \
+                ROL64(b01, b01, 36); \
+                ROL64(b02, b02,  3); \
+                ROL64(b03, b03, 41); \
+                ROL64(b04, b04, 18); \
+                ROL64(b10, b10,  1); \
+                ROL64(b11, b11, 44); \
+                ROL64(b12, b12, 10); \
+                ROL64(b13, b13, 45); \
+                ROL64(b14, b14,  2); \
+                ROL64(b20, b20, 62); \
+                ROL64(b21, b21,  6); \
+                ROL64(b22, b22, 43); \
+                ROL64(b23, b23, 15); \
+                ROL64(b24, b24, 61); \
+                ROL64(b30, b30, 28); \
+                ROL64(b31, b31, 55); \
+                ROL64(b32, b32, 25); \
+                ROL64(b33, b33, 21); \
+                ROL64(b34, b34, 56); \
+                ROL64(b40, b40, 27); \
+                ROL64(b41, b41, 20); \
+                ROL64(b42, b42, 39); \
+                ROL64(b43, b43,  8); \
+                ROL64(b44, b44, 14); \
+        } while (0)
+ 
+/*
+ * The KHI macro integrates the "lane complement" optimization. On input,
+ * some words are complemented:
+ *    a00 a01 a02 a04 a13 a20 a21 a22 a30 a33 a34 a43
+ * On output, the following words are complemented:
+ *    a04 a10 a20 a22 a23 a31
+ *
+ * The (implicit) permutation and the theta expansion will bring back
+ * the input mask for the next round.
+ */
+ 
+#define KHI_XO(d, a, b, c)   do { \
+                DECL64(kt); \
+                OR64(kt, b, c); \
+                XOR64(d, a, kt); \
+        } while (0)
+ 
+#define KHI_XA(d, a, b, c)   do { \
+                DECL64(kt); \
+                AND64(kt, b, c); \
+                XOR64(d, a, kt); \
+        } while (0)
+ 
+#define KHI(b00, b01, b02, b03, b04, b10, b11, b12, b13, b14, \
+        b20, b21, b22, b23, b24, b30, b31, b32, b33, b34, \
+        b40, b41, b42, b43, b44) \
+        do { \
+                DECL64(c0); \
+                DECL64(c1); \
+                DECL64(c2); \
+                DECL64(c3); \
+                DECL64(c4); \
+                DECL64(bnn); \
+                NOT64(bnn, b20); \
+                KHI_XO(c0, b00, b10, b20); \
+                KHI_XO(c1, b10, bnn, b30); \
+                KHI_XA(c2, b20, b30, b40); \
+                KHI_XO(c3, b30, b40, b00); \
+                KHI_XA(c4, b40, b00, b10); \
+                MOV64(b00, c0); \
+                MOV64(b10, c1); \
+                MOV64(b20, c2); \
+                MOV64(b30, c3); \
+                MOV64(b40, c4); \
+                NOT64(bnn, b41); \
+                KHI_XO(c0, b01, b11, b21); \
+                KHI_XA(c1, b11, b21, b31); \
+                KHI_XO(c2, b21, b31, bnn); \
+                KHI_XO(c3, b31, b41, b01); \
+                KHI_XA(c4, b41, b01, b11); \
+                MOV64(b01, c0); \
+                MOV64(b11, c1); \
+                MOV64(b21, c2); \
+                MOV64(b31, c3); \
+                MOV64(b41, c4); \
+                NOT64(bnn, b32); \
+                KHI_XO(c0, b02, b12, b22); \
+                KHI_XA(c1, b12, b22, b32); \
+                KHI_XA(c2, b22, bnn, b42); \
+                KHI_XO(c3, bnn, b42, b02); \
+                KHI_XA(c4, b42, b02, b12); \
+                MOV64(b02, c0); \
+                MOV64(b12, c1); \
+                MOV64(b22, c2); \
+                MOV64(b32, c3); \
+                MOV64(b42, c4); \
+                NOT64(bnn, b33); \
+                KHI_XA(c0, b03, b13, b23); \
+                KHI_XO(c1, b13, b23, b33); \
+                KHI_XO(c2, b23, bnn, b43); \
+                KHI_XA(c3, bnn, b43, b03); \
+                KHI_XO(c4, b43, b03, b13); \
+                MOV64(b03, c0); \
+                MOV64(b13, c1); \
+                MOV64(b23, c2); \
+                MOV64(b33, c3); \
+                MOV64(b43, c4); \
+                NOT64(bnn, b14); \
+                KHI_XA(c0, b04, bnn, b24); \
+                KHI_XO(c1, bnn, b24, b34); \
+                KHI_XA(c2, b24, b34, b44); \
+                KHI_XO(c3, b34, b44, b04); \
+                KHI_XA(c4, b44, b04, b14); \
+                MOV64(b04, c0); \
+                MOV64(b14, c1); \
+                MOV64(b24, c2); \
+                MOV64(b34, c3); \
+                MOV64(b44, c4); \
+        } while (0)
+ 
+#define IOTA(r)   XOR64_IOTA(a00, a00, r)
+ 
+#define P0    a00, a01, a02, a03, a04, a10, a11, a12, a13, a14, a20, a21, \
+              a22, a23, a24, a30, a31, a32, a33, a34, a40, a41, a42, a43, a44
+#define P1    a00, a30, a10, a40, a20, a11, a41, a21, a01, a31, a22, a02, \
+              a32, a12, a42, a33, a13, a43, a23, a03, a44, a24, a04, a34, a14
+#define P2    a00, a33, a11, a44, a22, a41, a24, a02, a30, a13, a32, a10, \
+              a43, a21, a04, a23, a01, a34, a12, a40, a14, a42, a20, a03, a31
+#define P3    a00, a23, a41, a14, a32, a24, a42, a10, a33, a01, a43, a11, \
+              a34, a02, a20, a12, a30, a03, a21, a44, a31, a04, a22, a40, a13
+#define P4    a00, a12, a24, a31, a43, a42, a04, a11, a23, a30, a34, a41, \
+              a03, a10, a22, a21, a33, a40, a02, a14, a13, a20, a32, a44, a01
+#define P5    a00, a21, a42, a13, a34, a04, a20, a41, a12, a33, a03, a24, \
+              a40, a11, a32, a02, a23, a44, a10, a31, a01, a22, a43, a14, a30
+#define P6    a00, a02, a04, a01, a03, a20, a22, a24, a21, a23, a40, a42, \
+              a44, a41, a43, a10, a12, a14, a11, a13, a30, a32, a34, a31, a33
+#define P7    a00, a10, a20, a30, a40, a22, a32, a42, a02, a12, a44, a04, \
+              a14, a24, a34, a11, a21, a31, a41, a01, a33, a43, a03, a13, a23
+#define P8    a00, a11, a22, a33, a44, a32, a43, a04, a10, a21, a14, a20, \
+              a31, a42, a03, a41, a02, a13, a24, a30, a23, a34, a40, a01, a12
+#define P9    a00, a41, a32, a23, a14, a43, a34, a20, a11, a02, a31, a22, \
+              a13, a04, a40, a24, a10, a01, a42, a33, a12, a03, a44, a30, a21
+#define P10   a00, a24, a43, a12, a31, a34, a03, a22, a41, a10, a13, a32, \
+              a01, a20, a44, a42, a11, a30, a04, a23, a21, a40, a14, a33, a02
+#define P11   a00, a42, a34, a21, a13, a03, a40, a32, a24, a11, a01, a43, \
+              a30, a22, a14, a04, a41, a33, a20, a12, a02, a44, a31, a23, a10
+#define P12   a00, a04, a03, a02, a01, a40, a44, a43, a42, a41, a30, a34, \
+              a33, a32, a31, a20, a24, a23, a22, a21, a10, a14, a13, a12, a11
+#define P13   a00, a20, a40, a10, a30, a44, a14, a34, a04, a24, a33, a03, \
+              a23, a43, a13, a22, a42, a12, a32, a02, a11, a31, a01, a21, a41
+#define P14   a00, a22, a44, a11, a33, a14, a31, a03, a20, a42, a23, a40, \
+              a12, a34, a01, a32, a04, a21, a43, a10, a41, a13, a30, a02, a24
+#define P15   a00, a32, a14, a41, a23, a31, a13, a40, a22, a04, a12, a44, \
+              a21, a03, a30, a43, a20, a02, a34, a11, a24, a01, a33, a10, a42
+#define P16   a00, a43, a31, a24, a12, a13, a01, a44, a32, a20, a21, a14, \
+              a02, a40, a33, a34, a22, a10, a03, a41, a42, a30, a23, a11, a04
+#define P17   a00, a34, a13, a42, a21, a01, a30, a14, a43, a22, a02, a31, \
+              a10, a44, a23, a03, a32, a11, a40, a24, a04, a33, a12, a41, a20
+#define P18   a00, a03, a01, a04, a02, a30, a33, a31, a34, a32, a10, a13, \
+              a11, a14, a12, a40, a43, a41, a44, a42, a20, a23, a21, a24, a22
+#define P19   a00, a40, a30, a20, a10, a33, a23, a13, a03, a43, a11, a01, \
+              a41, a31, a21, a44, a34, a24, a14, a04, a22, a12, a02, a42, a32
+#define P20   a00, a44, a33, a22, a11, a23, a12, a01, a40, a34, a41, a30, \
+              a24, a13, a02, a14, a03, a42, a31, a20, a32, a21, a10, a04, a43
+#define P21   a00, a14, a23, a32, a41, a12, a21, a30, a44, a03, a24, a33, \
+              a42, a01, a10, a31, a40, a04, a13, a22, a43, a02, a11, a20, a34
+#define P22   a00, a31, a12, a43, a24, a21, a02, a33, a14, a40, a42, a23, \
+              a04, a30, a11, a13, a44, a20, a01, a32, a34, a10, a41, a22, a03
+#define P23   a00, a13, a21, a34, a42, a02, a10, a23, a31, a44, a04, a12, \
+              a20, a33, a41, a01, a14, a22, a30, a43, a03, a11, a24, a32, a40
+ 
+#define P1_TO_P0   do { \
+                DECL64(t); \
+                MOV64(t, a01); \
+                MOV64(a01, a30); \
+                MOV64(a30, a33); \
+                MOV64(a33, a23); \
+                MOV64(a23, a12); \
+                MOV64(a12, a21); \
+                MOV64(a21, a02); \
+                MOV64(a02, a10); \
+                MOV64(a10, a11); \
+                MOV64(a11, a41); \
+                MOV64(a41, a24); \
+                MOV64(a24, a42); \
+                MOV64(a42, a04); \
+                MOV64(a04, a20); \
+                MOV64(a20, a22); \
+                MOV64(a22, a32); \
+                MOV64(a32, a43); \
+                MOV64(a43, a34); \
+                MOV64(a34, a03); \
+                MOV64(a03, a40); \
+                MOV64(a40, a44); \
+                MOV64(a44, a14); \
+                MOV64(a14, a31); \
+                MOV64(a31, a13); \
+                MOV64(a13, t); \
+        } while (0)
+ 
+#define P2_TO_P0   do { \
+                DECL64(t); \
+                MOV64(t, a01); \
+                MOV64(a01, a33); \
+                MOV64(a33, a12); \
+                MOV64(a12, a02); \
+                MOV64(a02, a11); \
+                MOV64(a11, a24); \
+                MOV64(a24, a04); \
+                MOV64(a04, a22); \
+                MOV64(a22, a43); \
+                MOV64(a43, a03); \
+                MOV64(a03, a44); \
+                MOV64(a44, a31); \
+                MOV64(a31, t); \
+                MOV64(t, a10); \
+                MOV64(a10, a41); \
+                MOV64(a41, a42); \
+                MOV64(a42, a20); \
+                MOV64(a20, a32); \
+                MOV64(a32, a34); \
+                MOV64(a34, a40); \
+                MOV64(a40, a14); \
+                MOV64(a14, a13); \
+                MOV64(a13, a30); \
+                MOV64(a30, a23); \
+                MOV64(a23, a21); \
+                MOV64(a21, t); \
+        } while (0)
+ 
+#define P4_TO_P0   do { \
+                DECL64(t); \
+                MOV64(t, a01); \
+                MOV64(a01, a12); \
+                MOV64(a12, a11); \
+                MOV64(a11, a04); \
+                MOV64(a04, a43); \
+                MOV64(a43, a44); \
+                MOV64(a44, t); \
+                MOV64(t, a02); \
+                MOV64(a02, a24); \
+                MOV64(a24, a22); \
+                MOV64(a22, a03); \
+                MOV64(a03, a31); \
+                MOV64(a31, a33); \
+                MOV64(a33, t); \
+                MOV64(t, a10); \
+                MOV64(a10, a42); \
+                MOV64(a42, a32); \
+                MOV64(a32, a40); \
+                MOV64(a40, a13); \
+                MOV64(a13, a23); \
+                MOV64(a23, t); \
+                MOV64(t, a14); \
+                MOV64(a14, a30); \
+                MOV64(a30, a21); \
+                MOV64(a21, a41); \
+                MOV64(a41, a20); \
+                MOV64(a20, a34); \
+                MOV64(a34, t); \
+        } while (0)
+ 
+#define P6_TO_P0   do { \
+                DECL64(t); \
+                MOV64(t, a01); \
+                MOV64(a01, a02); \
+                MOV64(a02, a04); \
+                MOV64(a04, a03); \
+                MOV64(a03, t); \
+                MOV64(t, a10); \
+                MOV64(a10, a20); \
+                MOV64(a20, a40); \
+                MOV64(a40, a30); \
+                MOV64(a30, t); \
+                MOV64(t, a11); \
+                MOV64(a11, a22); \
+                MOV64(a22, a44); \
+                MOV64(a44, a33); \
+                MOV64(a33, t); \
+                MOV64(t, a12); \
+                MOV64(a12, a24); \
+                MOV64(a24, a43); \
+                MOV64(a43, a31); \
+                MOV64(a31, t); \
+                MOV64(t, a13); \
+                MOV64(a13, a21); \
+                MOV64(a21, a42); \
+                MOV64(a42, a34); \
+                MOV64(a34, t); \
+                MOV64(t, a14); \
+                MOV64(a14, a23); \
+                MOV64(a23, a41); \
+                MOV64(a41, a32); \
+                MOV64(a32, t); \
+        } while (0)
+ 
+#define P8_TO_P0   do { \
+                DECL64(t); \
+                MOV64(t, a01); \
+                MOV64(a01, a11); \
+                MOV64(a11, a43); \
+                MOV64(a43, t); \
+                MOV64(t, a02); \
+                MOV64(a02, a22); \
+                MOV64(a22, a31); \
+                MOV64(a31, t); \
+                MOV64(t, a03); \
+                MOV64(a03, a33); \
+                MOV64(a33, a24); \
+                MOV64(a24, t); \
+                MOV64(t, a04); \
+                MOV64(a04, a44); \
+                MOV64(a44, a12); \
+                MOV64(a12, t); \
+                MOV64(t, a10); \
+                MOV64(a10, a32); \
+                MOV64(a32, a13); \
+                MOV64(a13, t); \
+                MOV64(t, a14); \
+                MOV64(a14, a21); \
+                MOV64(a21, a20); \
+                MOV64(a20, t); \
+                MOV64(t, a23); \
+                MOV64(a23, a42); \
+                MOV64(a42, a40); \
+                MOV64(a40, t); \
+                MOV64(t, a30); \
+                MOV64(a30, a41); \
+                MOV64(a41, a34); \
+                MOV64(a34, t); \
+        } while (0)
+ 
+#define P12_TO_P0   do { \
+                DECL64(t); \
+                MOV64(t, a01); \
+                MOV64(a01, a04); \
+                MOV64(a04, t); \
+                MOV64(t, a02); \
+                MOV64(a02, a03); \
+                MOV64(a03, t); \
+                MOV64(t, a10); \
+                MOV64(a10, a40); \
+                MOV64(a40, t); \
+                MOV64(t, a11); \
+                MOV64(a11, a44); \
+                MOV64(a44, t); \
+                MOV64(t, a12); \
+                MOV64(a12, a43); \
+                MOV64(a43, t); \
+                MOV64(t, a13); \
+                MOV64(a13, a42); \
+                MOV64(a42, t); \
+                MOV64(t, a14); \
+                MOV64(a14, a41); \
+                MOV64(a41, t); \
+                MOV64(t, a20); \
+                MOV64(a20, a30); \
+                MOV64(a30, t); \
+                MOV64(t, a21); \
+                MOV64(a21, a34); \
+                MOV64(a34, t); \
+                MOV64(t, a22); \
+                MOV64(a22, a33); \
+                MOV64(a33, t); \
+                MOV64(t, a23); \
+                MOV64(a23, a32); \
+                MOV64(a32, t); \
+                MOV64(t, a24); \
+                MOV64(a24, a31); \
+                MOV64(a31, t); \
+        } while (0)
+ 
+#define LPAR   (
+#define RPAR   )
+ 
+#define KF_ELT(r, s, k)   do { \
+                THETA LPAR P ## r RPAR; \
+                RHO LPAR P ## r RPAR; \
+                KHI LPAR P ## s RPAR; \
+                IOTA(k); \
+        } while (0)
+ 
+#define DO(x)   x
+ 
+#define KECCAK_F_1600   DO(KECCAK_F_1600_)
+ 
+#if SPH_KECCAK_UNROLL == 1
+ 
+#define KECCAK_F_1600_   do { \
+                int j; \
+                for (j = 0; j < 24; j ++) { \
+                        KF_ELT( 0,  1, RC[j + 0]); \
+                        P1_TO_P0; \
+                } \
+        } while (0)
+ 
+#elif SPH_KECCAK_UNROLL == 2
+ 
+#define KECCAK_F_1600_   do { \
+                int j; \
+                for (j = 0; j < 24; j += 2) { \
+                        KF_ELT( 0,  1, RC[j + 0]); \
+                        KF_ELT( 1,  2, RC[j + 1]); \
+                        P2_TO_P0; \
+                } \
+        } while (0)
+ 
+#elif SPH_KECCAK_UNROLL == 4
+ 
+#define KECCAK_F_1600_   do { \
+                int j; \
+                for (j = 0; j < 24; j += 4) { \
+                        KF_ELT( 0,  1, RC[j + 0]); \
+                        KF_ELT( 1,  2, RC[j + 1]); \
+                        KF_ELT( 2,  3, RC[j + 2]); \
+                        KF_ELT( 3,  4, RC[j + 3]); \
+                        P4_TO_P0; \
+                } \
+        } while (0)
+ 
+#elif SPH_KECCAK_UNROLL == 6
+ 
+#define KECCAK_F_1600_   do { \
+                int j; \
+                for (j = 0; j < 24; j += 6) { \
+                        KF_ELT( 0,  1, RC[j + 0]); \
+                        KF_ELT( 1,  2, RC[j + 1]); \
+                        KF_ELT( 2,  3, RC[j + 2]); \
+                        KF_ELT( 3,  4, RC[j + 3]); \
+                        KF_ELT( 4,  5, RC[j + 4]); \
+                        KF_ELT( 5,  6, RC[j + 5]); \
+                        P6_TO_P0; \
+                } \
+        } while (0)
+ 
+#elif SPH_KECCAK_UNROLL == 8
+ 
+#define KECCAK_F_1600_   do { \
+                int j; \
+                for (j = 0; j < 24; j += 8) { \
+                        KF_ELT( 0,  1, RC[j + 0]); \
+                        KF_ELT( 1,  2, RC[j + 1]); \
+                        KF_ELT( 2,  3, RC[j + 2]); \
+                        KF_ELT( 3,  4, RC[j + 3]); \
+                        KF_ELT( 4,  5, RC[j + 4]); \
+                        KF_ELT( 5,  6, RC[j + 5]); \
+                        KF_ELT( 6,  7, RC[j + 6]); \
+                        KF_ELT( 7,  8, RC[j + 7]); \
+                        P8_TO_P0; \
+                } \
+        } while (0)
+ 
+#elif SPH_KECCAK_UNROLL == 12
+ 
+#define KECCAK_F_1600_   do { \
+                int j; \
+                for (j = 0; j < 24; j += 12) { \
+                        KF_ELT( 0,  1, RC[j +  0]); \
+                        KF_ELT( 1,  2, RC[j +  1]); \
+                        KF_ELT( 2,  3, RC[j +  2]); \
+                        KF_ELT( 3,  4, RC[j +  3]); \
+                        KF_ELT( 4,  5, RC[j +  4]); \
+                        KF_ELT( 5,  6, RC[j +  5]); \
+                        KF_ELT( 6,  7, RC[j +  6]); \
+                        KF_ELT( 7,  8, RC[j +  7]); \
+                        KF_ELT( 8,  9, RC[j +  8]); \
+                        KF_ELT( 9, 10, RC[j +  9]); \
+                        KF_ELT(10, 11, RC[j + 10]); \
+                        KF_ELT(11, 12, RC[j + 11]); \
+                        P12_TO_P0; \
+                } \
+        } while (0)
+ 
+#elif SPH_KECCAK_UNROLL == 0
+ 
+#define KECCAK_F_1600_   do { \
+                KF_ELT( 0,  1, RC[ 0]); \
+                KF_ELT( 1,  2, RC[ 1]); \
+                KF_ELT( 2,  3, RC[ 2]); \
+                KF_ELT( 3,  4, RC[ 3]); \
+                KF_ELT( 4,  5, RC[ 4]); \
+                KF_ELT( 5,  6, RC[ 5]); \
+                KF_ELT( 6,  7, RC[ 6]); \
+                KF_ELT( 7,  8, RC[ 7]); \
+                KF_ELT( 8,  9, RC[ 8]); \
+                KF_ELT( 9, 10, RC[ 9]); \
+                KF_ELT(10, 11, RC[10]); \
+                KF_ELT(11, 12, RC[11]); \
+                KF_ELT(12, 13, RC[12]); \
+                KF_ELT(13, 14, RC[13]); \
+                KF_ELT(14, 15, RC[14]); \
+                KF_ELT(15, 16, RC[15]); \
+                KF_ELT(16, 17, RC[16]); \
+                KF_ELT(17, 18, RC[17]); \
+                KF_ELT(18, 19, RC[18]); \
+                KF_ELT(19, 20, RC[19]); \
+                KF_ELT(20, 21, RC[20]); \
+                KF_ELT(21, 22, RC[21]); \
+                KF_ELT(22, 23, RC[22]); \
+                KF_ELT(23,  0, RC[23]); \
+        } while (0)
+ 
+#else
+ 
+#error Unimplemented unroll count for Keccak.
+ 
+#endif
+ 
+static void
+keccak_init(void *kcv, unsigned out_size)
+{
+        sph_keccak_context* kc = (sph_keccak_context*)kcv;
+        int i;
+ 
+#if SPH_KECCAK_64
+        for (i = 0; i < 25; i ++)
+                kc->u.wide[i] = 0;
+        /*
+         * Initialization for the "lane complement".
+         */
+        kc->u.wide[ 1] = SPH_C64(0xFFFFFFFFFFFFFFFF);
+        kc->u.wide[ 2] = SPH_C64(0xFFFFFFFFFFFFFFFF);
+        kc->u.wide[ 8] = SPH_C64(0xFFFFFFFFFFFFFFFF);
+        kc->u.wide[12] = SPH_C64(0xFFFFFFFFFFFFFFFF);
+        kc->u.wide[17] = SPH_C64(0xFFFFFFFFFFFFFFFF);
+        kc->u.wide[20] = SPH_C64(0xFFFFFFFFFFFFFFFF);
+#else
+ 
+        for (i = 0; i < 50; i ++)
+                kc->u.narrow[i] = 0;
+        /*
+         * Initialization for the "lane complement".
+         * Note: since we set to all-one full 64-bit words,
+         * interleaving (if applicable) is a no-op.
+         */
+        kc->u.narrow[ 2] = SPH_C32(0xFFFFFFFF);
+        kc->u.narrow[ 3] = SPH_C32(0xFFFFFFFF);
+        kc->u.narrow[ 4] = SPH_C32(0xFFFFFFFF);
+        kc->u.narrow[ 5] = SPH_C32(0xFFFFFFFF);
+        kc->u.narrow[16] = SPH_C32(0xFFFFFFFF);
+        kc->u.narrow[17] = SPH_C32(0xFFFFFFFF);
+        kc->u.narrow[24] = SPH_C32(0xFFFFFFFF);
+        kc->u.narrow[25] = SPH_C32(0xFFFFFFFF);
+        kc->u.narrow[34] = SPH_C32(0xFFFFFFFF);
+        kc->u.narrow[35] = SPH_C32(0xFFFFFFFF);
+        kc->u.narrow[40] = SPH_C32(0xFFFFFFFF);
+        kc->u.narrow[41] = SPH_C32(0xFFFFFFFF);
+#endif
+        kc->ptr = 0;
+        kc->lim = 200 - (out_size >> 2);
+}
+ 
+static void
+keccak_core(void *kcv, const void *data, size_t len, size_t lim)
+{
+ 
+        sph_keccak_context* kc = (sph_keccak_context*)kcv;
+        unsigned char *buf;
+        size_t ptr;
+        DECL_STATE
+ 
+        buf = kc->buf;
+        ptr = kc->ptr;
+ 
+        if (len < (lim - ptr)) {
+                memcpy(buf + ptr, data, len);
+                kc->ptr = ptr + len;
+                return;
+        }
+ 
+        READ_STATE(kc);
+        while (len > 0) {
+                size_t clen;
+ 
+                clen = (lim - ptr);
+                if (clen > len)
+                        clen = len;
+                memcpy(buf + ptr, data, clen);
+                ptr += clen;
+                data = (const unsigned char *)data + clen;
+                len -= clen;
+                if (ptr == lim) {
+                        INPUT_BUF(lim);
+                        KECCAK_F_1600;
+                        ptr = 0;
+                }
+        }
+        WRITE_STATE(kc);
+        kc->ptr = ptr;
+}
+ 
+#if SPH_KECCAK_64
+ 
+#define DEFCLOSE(d, lim) \
+        static void keccak_close ## d( \
+                void *kcv, unsigned ub, unsigned n, void *dst) \
+        { \
+                sph_keccak_context* kc = (sph_keccak_context*)kcv; \
+                unsigned eb; \
+                union { \
+                        unsigned char tmp[lim + 1]; \
+                        sph_u64 dummy;   /* for alignment */ \
+                } u; \
+                size_t j; \
+ \
+                eb = (0x100 | (ub & 0xFF)) >> (8 - n); \
+                if (kc->ptr == (lim - 1)) { \
+                        if (n == 7) { \
+                                u.tmp[0] = eb; \
+                                memset(u.tmp + 1, 0, lim - 1); \
+                                u.tmp[lim] = 0x80; \
+                                j = 1 + lim; \
+                        } else { \
+                                u.tmp[0] = eb | 0x80; \
+                                j = 1; \
+                        } \
+                } else { \
+                        j = lim - kc->ptr; \
+                        u.tmp[0] = eb; \
+                        memset(u.tmp + 1, 0, j - 2); \
+                        u.tmp[j - 1] = 0x80; \
+                } \
+                keccak_core(kc, u.tmp, j, lim); \
+                /* Finalize the "lane complement" */ \
+                kc->u.wide[ 1] = ~kc->u.wide[ 1]; \
+                kc->u.wide[ 2] = ~kc->u.wide[ 2]; \
+                kc->u.wide[ 8] = ~kc->u.wide[ 8]; \
+                kc->u.wide[12] = ~kc->u.wide[12]; \
+                kc->u.wide[17] = ~kc->u.wide[17]; \
+                kc->u.wide[20] = ~kc->u.wide[20]; \
+                for (j = 0; j < d; j += 8) \
+                        sph_enc64le_aligned(u.tmp + j, kc->u.wide[j >> 3]); \
+                memcpy(dst, u.tmp, d); \
+                keccak_init(kc, (unsigned)d << 3); \
+        } \
+ 
+#else
+ 
+#define DEFCLOSE(d, lim) \
+        static void keccak_close ## d( \
+                void *kcv, unsigned ub, unsigned n, void *dst) \
+        { \
+                sph_keccak_context* kc = (sph_keccak_context*)kcv; \
+                unsigned eb; \
+                union { \
+                        unsigned char tmp[lim + 1]; \
+                        sph_u64 dummy;   /* for alignment */ \
+                } u; \
+                size_t j; \
+ \
+                eb = (0x100 | (ub & 0xFF)) >> (8 - n); \
+                if (kc->ptr == (lim - 1)) { \
+                        if (n == 7) { \
+                                u.tmp[0] = eb; \
+                                memset(u.tmp + 1, 0, lim - 1); \
+                                u.tmp[lim] = 0x80; \
+                                j = 1 + lim; \
+                        } else { \
+                                u.tmp[0] = eb | 0x80; \
+                                j = 1; \
+                        } \
+                } else { \
+                        j = lim - kc->ptr; \
+                        u.tmp[0] = eb; \
+                        memset(u.tmp + 1, 0, j - 2); \
+                        u.tmp[j - 1] = 0x80; \
+                } \
+                keccak_core(kc, u.tmp, j, lim); \
+                /* Finalize the "lane complement" */ \
+                kc->u.narrow[ 2] = ~kc->u.narrow[ 2]; \
+                kc->u.narrow[ 3] = ~kc->u.narrow[ 3]; \
+                kc->u.narrow[ 4] = ~kc->u.narrow[ 4]; \
+                kc->u.narrow[ 5] = ~kc->u.narrow[ 5]; \
+                kc->u.narrow[16] = ~kc->u.narrow[16]; \
+                kc->u.narrow[17] = ~kc->u.narrow[17]; \
+                kc->u.narrow[24] = ~kc->u.narrow[24]; \
+                kc->u.narrow[25] = ~kc->u.narrow[25]; \
+                kc->u.narrow[34] = ~kc->u.narrow[34]; \
+                kc->u.narrow[35] = ~kc->u.narrow[35]; \
+                kc->u.narrow[40] = ~kc->u.narrow[40]; \
+                kc->u.narrow[41] = ~kc->u.narrow[41]; \
+                /* un-interleave */ \
+                for (j = 0; j < 50; j += 2) \
+                        UNINTERLEAVE(kc->u.narrow[j], kc->u.narrow[j + 1]); \
+                for (j = 0; j < d; j += 4) \
+                        sph_enc32le_aligned(u.tmp + j, kc->u.narrow[j >> 2]); \
+                memcpy(dst, u.tmp, d); \
+                keccak_init(kc, (unsigned)d << 3); \
+        } \
+ 
+#endif
+ 
+DEFCLOSE(28, 144)
+DEFCLOSE(32, 136)
+DEFCLOSE(48, 104)
+DEFCLOSE(64, 72)
+ 
+/* see sph_keccak.h */
+void
+sph_keccak224_init(void *cc)
+{
+        keccak_init(cc, 224);
+}
+ 
+/* see sph_keccak.h */
+void
+sph_keccak224(void *cc, const void *data, size_t len)
+{
+        keccak_core(cc, data, len, 144);
+}
+ 
+/* see sph_keccak.h */
+void
+sph_keccak224_close(void *cc, void *dst)
+{
+        sph_keccak224_addbits_and_close(cc, 0, 0, dst);
+}
+ 
+/* see sph_keccak.h */
+void
+sph_keccak224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
+{
+        keccak_close28(cc, ub, n, dst);
+}
+ 
+/* see sph_keccak.h */
+void
+sph_keccak256_init(void *cc)
+{
+        keccak_init(cc, 256);
+}
+ 
+/* see sph_keccak.h */
+void
+sph_keccak256(void *cc, const void *data, size_t len)
+{
+        keccak_core(cc, data, len, 136);
+}
+ 
+/* see sph_keccak.h */
+void
+sph_keccak256_close(void *cc, void *dst)
+{
+        sph_keccak256_addbits_and_close(cc, 0, 0, dst);
+}
+ 
+/* see sph_keccak.h */
+void
+sph_keccak256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
+{
+        keccak_close32(cc, ub, n, dst);
+}
+ 
+/* see sph_keccak.h */
+void
+sph_keccak384_init(void *cc)
+{
+        keccak_init(cc, 384);
+}
+ 
+/* see sph_keccak.h */
+void
+sph_keccak384(void *cc, const void *data, size_t len)
+{
+        keccak_core(cc, data, len, 104);
+}
+ 
+/* see sph_keccak.h */
+void
+sph_keccak384_close(void *cc, void *dst)
+{
+        sph_keccak384_addbits_and_close(cc, 0, 0, dst);
+}
+ 
+/* see sph_keccak.h */
+void
+sph_keccak384_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
+{
+        keccak_close48(cc, ub, n, dst);
+}
+ 
+/* see sph_keccak.h */
+void
+sph_keccak512_init(void *cc)
+{
+        keccak_init(cc, 512);
+}
+ 
+/* see sph_keccak.h */
+void
+sph_keccak512(void *cc, const void *data, size_t len)
+{
+        keccak_core(cc, data, len, 72);
+}
+ 
+/* see sph_keccak.h */
+void
+sph_keccak512_close(void *cc, void *dst)
+{
+        sph_keccak512_addbits_and_close(cc, 0, 0, dst);
+}
+ 
+/* see sph_keccak.h */
+void
+sph_keccak512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
+{
+        keccak_close64(cc, ub, n, dst);
+}
+ 
+ 
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/crypto/sph_keccak.h b/src/crypto/sph_keccak.h
new file mode 100644
index 0000000000000..bdafdb88db020
--- /dev/null
+++ b/src/crypto/sph_keccak.h
@@ -0,0 +1,293 @@
+/* $Id: sph_keccak.h 216 2010-06-08 09:46:57Z tp $ */
+/**
+ * Keccak interface. This is the interface for Keccak with the
+ * recommended parameters for SHA-3, with output lengths 224, 256,
+ * 384 and 512 bits.
+ *
+ * ==========================(LICENSE BEGIN)============================
+ *
+ * Copyright (c) 2007-2010  Projet RNRT SAPHIR
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ===========================(LICENSE END)=============================
+ *
+ * @file     sph_keccak.h
+ * @author   Thomas Pornin <thomas.pornin@cryptolog.com>
+ */
+
+#ifndef SPH_KECCAK_H__
+#define SPH_KECCAK_H__
+
+#ifdef __cplusplus
+extern "C"{
+#endif
+
+#include <stddef.h>
+#include "sph_types.h"
+
+/**
+ * Output size (in bits) for Keccak-224.
+ */
+#define SPH_SIZE_keccak224   224
+
+/**
+ * Output size (in bits) for Keccak-256.
+ */
+#define SPH_SIZE_keccak256   256
+
+/**
+ * Output size (in bits) for Keccak-384.
+ */
+#define SPH_SIZE_keccak384   384
+
+/**
+ * Output size (in bits) for Keccak-512.
+ */
+#define SPH_SIZE_keccak512   512
+
+/**
+ * This structure is a context for Keccak computations: it contains the
+ * intermediate values and some data from the last entered block. Once a
+ * Keccak computation has been performed, the context can be reused for
+ * another computation.
+ *
+ * The contents of this structure are private. A running Keccak computation
+ * can be cloned by copying the context (e.g. with a simple
+ * <code>memcpy()</code>).
+ */
+typedef struct {
+#ifndef DOXYGEN_IGNORE
+	unsigned char buf[144];    /* first field, for alignment */
+	size_t ptr, lim;
+	union {
+#if SPH_64
+		sph_u64 wide[25];
+#endif
+		sph_u32 narrow[50];
+	} u;
+#endif
+} sph_keccak_context;
+
+/**
+ * Type for a Keccak-224 context (identical to the common context).
+ */
+typedef sph_keccak_context sph_keccak224_context;
+
+/**
+ * Type for a Keccak-256 context (identical to the common context).
+ */
+typedef sph_keccak_context sph_keccak256_context;
+
+/**
+ * Type for a Keccak-384 context (identical to the common context).
+ */
+typedef sph_keccak_context sph_keccak384_context;
+
+/**
+ * Type for a Keccak-512 context (identical to the common context).
+ */
+typedef sph_keccak_context sph_keccak512_context;
+
+/**
+ * Initialize a Keccak-224 context. This process performs no memory allocation.
+ *
+ * @param cc   the Keccak-224 context (pointer to a
+ *             <code>sph_keccak224_context</code>)
+ */
+void sph_keccak224_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the Keccak-224 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_keccak224(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current Keccak-224 computation and output the result into
+ * the provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (28 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the Keccak-224 context
+ * @param dst   the destination buffer
+ */
+void sph_keccak224_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (28 bytes). If bit number i
+ * in <code>ub</code> has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc    the Keccak-224 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the destination buffer
+ */
+void sph_keccak224_addbits_and_close(
+	void *cc, unsigned ub, unsigned n, void *dst);
+
+/**
+ * Initialize a Keccak-256 context. This process performs no memory allocation.
+ *
+ * @param cc   the Keccak-256 context (pointer to a
+ *             <code>sph_keccak256_context</code>)
+ */
+void sph_keccak256_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the Keccak-256 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_keccak256(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current Keccak-256 computation and output the result into
+ * the provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (32 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the Keccak-256 context
+ * @param dst   the destination buffer
+ */
+void sph_keccak256_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (32 bytes). If bit number i
+ * in <code>ub</code> has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc    the Keccak-256 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the destination buffer
+ */
+void sph_keccak256_addbits_and_close(
+	void *cc, unsigned ub, unsigned n, void *dst);
+
+/**
+ * Initialize a Keccak-384 context. This process performs no memory allocation.
+ *
+ * @param cc   the Keccak-384 context (pointer to a
+ *             <code>sph_keccak384_context</code>)
+ */
+void sph_keccak384_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the Keccak-384 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_keccak384(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current Keccak-384 computation and output the result into
+ * the provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (48 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the Keccak-384 context
+ * @param dst   the destination buffer
+ */
+void sph_keccak384_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (48 bytes). If bit number i
+ * in <code>ub</code> has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc    the Keccak-384 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the destination buffer
+ */
+void sph_keccak384_addbits_and_close(
+	void *cc, unsigned ub, unsigned n, void *dst);
+
+/**
+ * Initialize a Keccak-512 context. This process performs no memory allocation.
+ *
+ * @param cc   the Keccak-512 context (pointer to a
+ *             <code>sph_keccak512_context</code>)
+ */
+void sph_keccak512_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the Keccak-512 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_keccak512(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current Keccak-512 computation and output the result into
+ * the provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (64 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the Keccak-512 context
+ * @param dst   the destination buffer
+ */
+void sph_keccak512_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (64 bytes). If bit number i
+ * in <code>ub</code> has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc    the Keccak-512 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the destination buffer
+ */
+void sph_keccak512_addbits_and_close(
+	void *cc, unsigned ub, unsigned n, void *dst);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/crypto/sph_types.h b/src/crypto/sph_types.h
new file mode 100644
index 0000000000000..7295b0b37097a
--- /dev/null
+++ b/src/crypto/sph_types.h
@@ -0,0 +1,1976 @@
+/* $Id: sph_types.h 260 2011-07-21 01:02:38Z tp $ */
+/**
+ * Basic type definitions.
+ *
+ * This header file defines the generic integer types that will be used
+ * for the implementation of hash functions; it also contains helper
+ * functions which encode and decode multi-byte integer values, using
+ * either little-endian or big-endian conventions.
+ *
+ * This file contains a compile-time test on the size of a byte
+ * (the <code>unsigned char</code> C type). If bytes are not octets,
+ * i.e. if they do not have a size of exactly 8 bits, then compilation
+ * is aborted. Architectures where bytes are not octets are relatively
+ * rare, even in the embedded devices market. We forbid non-octet bytes
+ * because there is no clear convention on how octet streams are encoded
+ * on such systems.
+ *
+ * ==========================(LICENSE BEGIN)============================
+ *
+ * Copyright (c) 2007-2010  Projet RNRT SAPHIR
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ===========================(LICENSE END)=============================
+ *
+ * @file     sph_types.h
+ * @author   Thomas Pornin <thomas.pornin@cryptolog.com>
+ */
+
+#ifndef SPH_TYPES_H__
+#define SPH_TYPES_H__
+
+#include <limits.h>
+
+/*
+ * All our I/O functions are defined over octet streams. We do not know
+ * how to handle input data if bytes are not octets.
+ */
+#if CHAR_BIT != 8
+#error This code requires 8-bit bytes
+#endif
+
+/* ============= BEGIN documentation block for Doxygen ============ */
+
+#ifdef DOXYGEN_IGNORE
+
+/** @mainpage sphlib C code documentation
+ *
+ * @section overview Overview
+ *
+ * <code>sphlib</code> is a library which contains implementations of
+ * various cryptographic hash functions. These pages have been generated
+ * with <a href="http://www.doxygen.org/index.html">doxygen</a> and
+ * document the API for the C implementations.
+ *
+ * The API is described in appropriate header files, which are available
+ * in the "Files" section. Each hash function family has its own header,
+ * whose name begins with <code>"sph_"</code> and contains the family
+ * name. For instance, the API for the RIPEMD hash functions is available
+ * in the header file <code>sph_ripemd.h</code>.
+ *
+ * @section principles API structure and conventions
+ *
+ * @subsection io Input/output conventions
+ *
+ * In all generality, hash functions operate over strings of bits.
+ * Individual bits are rarely encountered in C programming or actual
+ * communication protocols; most protocols converge on the ubiquitous
+ * "octet" which is a group of eight bits. Data is thus expressed as a
+ * stream of octets. The C programming language contains the notion of a
+ * "byte", which is a data unit managed under the type <code>"unsigned
+ * char"</code>. The C standard prescribes that a byte should hold at
+ * least eight bits, but possibly more. Most modern architectures, even
+ * in the embedded world, feature eight-bit bytes, i.e. map bytes to
+ * octets.
+ *
+ * Nevertheless, for some of the implemented hash functions, an extra
+ * API has been added, which allows the input of arbitrary sequences of
+ * bits: when the computation is about to be closed, 1 to 7 extra bits
+ * can be added. The functions for which this API is implemented include
+ * the SHA-2 functions and all SHA-3 candidates.
+ *
+ * <code>sphlib</code> defines hash function which may hash octet streams,
+ * i.e. streams of bits where the number of bits is a multiple of eight.
+ * The data input functions in the <code>sphlib</code> API expect data
+ * as anonymous pointers (<code>"const void *"</code>) with a length
+ * (of type <code>"size_t"</code>) which gives the input data chunk length
+ * in bytes. A byte is assumed to be an octet; the <code>sph_types.h</code>
+ * header contains a compile-time test which prevents compilation on
+ * architectures where this property is not met.
+ *
+ * The hash function output is also converted into bytes. All currently
+ * implemented hash functions have an output width which is a multiple of
+ * eight, and this is likely to remain true for new designs.
+ *
+ * Most hash functions internally convert input data into 32-bit of 64-bit
+ * words, using either little-endian or big-endian conversion. The hash
+ * output also often consists of such words, which are encoded into output
+ * bytes with a similar endianness convention. Some hash functions have
+ * been only loosely specified on that subject; when necessary,
+ * <code>sphlib</code> has been tested against published "reference"
+ * implementations in order to use the same conventions.
+ *
+ * @subsection shortname Function short name
+ *
+ * Each implemented hash function has a "short name" which is used
+ * internally to derive the identifiers for the functions and context
+ * structures which the function uses. For instance, MD5 has the short
+ * name <code>"md5"</code>. Short names are listed in the next section,
+ * for the implemented hash functions. In subsequent sections, the
+ * short name will be assumed to be <code>"XXX"</code>: replace with the
+ * actual hash function name to get the C identifier.
+ *
+ * Note: some functions within the same family share the same core
+ * elements, such as update function or context structure. Correspondingly,
+ * some of the defined types or functions may actually be macros which
+ * transparently evaluate to another type or function name.
+ *
+ * @subsection context Context structure
+ *
+ * Each implemented hash fonction has its own context structure, available
+ * under the type name <code>"sph_XXX_context"</code> for the hash function
+ * with short name <code>"XXX"</code>. This structure holds all needed
+ * state for a running hash computation.
+ *
+ * The contents of these structures are meant to be opaque, and private
+ * to the implementation. However, these contents are specified in the
+ * header files so that application code which uses <code>sphlib</code>
+ * may access the size of those structures.
+ *
+ * The caller is responsible for allocating the context structure,
+ * whether by dynamic allocation (<code>malloc()</code> or equivalent),
+ * static allocation (a global permanent variable), as an automatic
+ * variable ("on the stack"), or by any other mean which ensures proper
+ * structure alignment. <code>sphlib</code> code performs no dynamic
+ * allocation by itself.
+ *
+ * The context must be initialized before use, using the
+ * <code>sph_XXX_init()</code> function. This function sets the context
+ * state to proper initial values for hashing.
+ *
+ * Since all state data is contained within the context structure,
+ * <code>sphlib</code> is thread-safe and reentrant: several hash
+ * computations may be performed in parallel, provided that they do not
+ * operate on the same context. Moreover, a running computation can be
+ * cloned by copying the context (with a simple <code>memcpy()</code>):
+ * the context and its clone are then independant and may be updated
+ * with new data and/or closed without interfering with each other.
+ * Similarly, a context structure can be moved in memory at will:
+ * context structures contain no pointer, in particular no pointer to
+ * themselves.
+ *
+ * @subsection dataio Data input
+ *
+ * Hashed data is input with the <code>sph_XXX()</code> fonction, which
+ * takes as parameters a pointer to the context, a pointer to the data
+ * to hash, and the number of data bytes to hash. The context is updated
+ * with the new data.
+ *
+ * Data can be input in one or several calls, with arbitrary input lengths.
+ * However, it is best, performance wise, to input data by relatively big
+ * chunks (say a few kilobytes), because this allows <code>sphlib</code> to
+ * optimize things and avoid internal copying.
+ *
+ * When all data has been input, the context can be closed with
+ * <code>sph_XXX_close()</code>. The hash output is computed and written
+ * into the provided buffer. The caller must take care to provide a
+ * buffer of appropriate length; e.g., when using SHA-1, the output is
+ * a 20-byte word, therefore the output buffer must be at least 20-byte
+ * long.
+ *
+ * For some hash functions, the <code>sph_XXX_addbits_and_close()</code>
+ * function can be used instead of <code>sph_XXX_close()</code>. This
+ * function can take a few extra <strong>bits</strong> to be added at
+ * the end of the input message. This allows hashing messages with a
+ * bit length which is not a multiple of 8. The extra bits are provided
+ * as an unsigned integer value, and a bit count. The bit count must be
+ * between 0 and 7, inclusive. The extra bits are provided as bits 7 to
+ * 0 (bits of numerical value 128, 64, 32... downto 0), in that order.
+ * For instance, to add three bits of value 1, 1 and 0, the unsigned
+ * integer will have value 192 (1*128 + 1*64 + 0*32) and the bit count
+ * will be 3.
+ *
+ * The <code>SPH_SIZE_XXX</code> macro is defined for each hash function;
+ * it evaluates to the function output size, expressed in bits. For instance,
+ * <code>SPH_SIZE_sha1</code> evaluates to <code>160</code>.
+ *
+ * When closed, the context is automatically reinitialized and can be
+ * immediately used for another computation. It is not necessary to call
+ * <code>sph_XXX_init()</code> after a close. Note that
+ * <code>sph_XXX_init()</code> can still be called to "reset" a context,
+ * i.e. forget previously input data, and get back to the initial state.
+ *
+ * @subsection alignment Data alignment
+ *
+ * "Alignment" is a property of data, which is said to be "properly
+ * aligned" when its emplacement in memory is such that the data can
+ * be optimally read by full words. This depends on the type of access;
+ * basically, some hash functions will read data by 32-bit or 64-bit
+ * words. <code>sphlib</code> does not mandate such alignment for input
+ * data, but using aligned data can substantially improve performance.
+ *
+ * As a rule, it is best to input data by chunks whose length (in bytes)
+ * is a multiple of eight, and which begins at "generally aligned"
+ * addresses, such as the base address returned by a call to
+ * <code>malloc()</code>.
+ *
+ * @section functions Implemented functions
+ *
+ * We give here the list of implemented functions. They are grouped by
+ * family; to each family corresponds a specific header file. Each
+ * individual function has its associated "short name". Please refer to
+ * the documentation for that header file to get details on the hash
+ * function denomination and provenance.
+ *
+ * Note: the functions marked with a '(64)' in the list below are
+ * available only if the C compiler provides an integer type of length
+ * 64 bits or more. Such a type is mandatory in the latest C standard
+ * (ISO 9899:1999, aka "C99") and is present in several older compilers
+ * as well, so chances are that such a type is available.
+ *
+ * - HAVAL family: file <code>sph_haval.h</code>
+ *   - HAVAL-128/3 (128-bit, 3 passes): short name: <code>haval128_3</code>
+ *   - HAVAL-128/4 (128-bit, 4 passes): short name: <code>haval128_4</code>
+ *   - HAVAL-128/5 (128-bit, 5 passes): short name: <code>haval128_5</code>
+ *   - HAVAL-160/3 (160-bit, 3 passes): short name: <code>haval160_3</code>
+ *   - HAVAL-160/4 (160-bit, 4 passes): short name: <code>haval160_4</code>
+ *   - HAVAL-160/5 (160-bit, 5 passes): short name: <code>haval160_5</code>
+ *   - HAVAL-192/3 (192-bit, 3 passes): short name: <code>haval192_3</code>
+ *   - HAVAL-192/4 (192-bit, 4 passes): short name: <code>haval192_4</code>
+ *   - HAVAL-192/5 (192-bit, 5 passes): short name: <code>haval192_5</code>
+ *   - HAVAL-224/3 (224-bit, 3 passes): short name: <code>haval224_3</code>
+ *   - HAVAL-224/4 (224-bit, 4 passes): short name: <code>haval224_4</code>
+ *   - HAVAL-224/5 (224-bit, 5 passes): short name: <code>haval224_5</code>
+ *   - HAVAL-256/3 (256-bit, 3 passes): short name: <code>haval256_3</code>
+ *   - HAVAL-256/4 (256-bit, 4 passes): short name: <code>haval256_4</code>
+ *   - HAVAL-256/5 (256-bit, 5 passes): short name: <code>haval256_5</code>
+ * - MD2: file <code>sph_md2.h</code>, short name: <code>md2</code>
+ * - MD4: file <code>sph_md4.h</code>, short name: <code>md4</code>
+ * - MD5: file <code>sph_md5.h</code>, short name: <code>md5</code>
+ * - PANAMA: file <code>sph_panama.h</code>, short name: <code>panama</code>
+ * - RadioGatun family: file <code>sph_radiogatun.h</code>
+ *   - RadioGatun[32]: short name: <code>radiogatun32</code>
+ *   - RadioGatun[64]: short name: <code>radiogatun64</code> (64)
+ * - RIPEMD family: file <code>sph_ripemd.h</code>
+ *   - RIPEMD: short name: <code>ripemd</code>
+ *   - RIPEMD-128: short name: <code>ripemd128</code>
+ *   - RIPEMD-160: short name: <code>ripemd160</code>
+ * - SHA-0: file <code>sph_sha0.h</code>, short name: <code>sha0</code>
+ * - SHA-1: file <code>sph_sha1.h</code>, short name: <code>sha1</code>
+ * - SHA-2 family, 32-bit hashes: file <code>sph_sha2.h</code>
+ *   - SHA-224: short name: <code>sha224</code>
+ *   - SHA-256: short name: <code>sha256</code>
+ *   - SHA-384: short name: <code>sha384</code> (64)
+ *   - SHA-512: short name: <code>sha512</code> (64)
+ * - Tiger family: file <code>sph_tiger.h</code>
+ *   - Tiger: short name: <code>tiger</code> (64)
+ *   - Tiger2: short name: <code>tiger2</code> (64)
+ * - WHIRLPOOL family: file <code>sph_whirlpool.h</code>
+ *   - WHIRLPOOL-0: short name: <code>whirlpool0</code> (64)
+ *   - WHIRLPOOL-1: short name: <code>whirlpool1</code> (64)
+ *   - WHIRLPOOL: short name: <code>whirlpool</code> (64)
+ *
+ * The fourteen second-round SHA-3 candidates are also implemented;
+ * when applicable, the implementations follow the "final" specifications
+ * as published for the third round of the SHA-3 competition (BLAKE,
+ * Groestl, JH, Keccak and Skein have been tweaked for third round).
+ *
+ * - BLAKE family: file <code>sph_blake.h</code>
+ *   - BLAKE-224: short name: <code>blake224</code>
+ *   - BLAKE-256: short name: <code>blake256</code>
+ *   - BLAKE-384: short name: <code>blake384</code>
+ *   - BLAKE-512: short name: <code>blake512</code>
+ * - BMW (Blue Midnight Wish) family: file <code>sph_bmw.h</code>
+ *   - BMW-224: short name: <code>bmw224</code>
+ *   - BMW-256: short name: <code>bmw256</code>
+ *   - BMW-384: short name: <code>bmw384</code> (64)
+ *   - BMW-512: short name: <code>bmw512</code> (64)
+ * - CubeHash family: file <code>sph_cubehash.h</code> (specified as
+ *   CubeHash16/32 in the CubeHash specification)
+ *   - CubeHash-224: short name: <code>cubehash224</code>
+ *   - CubeHash-256: short name: <code>cubehash256</code>
+ *   - CubeHash-384: short name: <code>cubehash384</code>
+ *   - CubeHash-512: short name: <code>cubehash512</code>
+ * - ECHO family: file <code>sph_echo.h</code>
+ *   - ECHO-224: short name: <code>echo224</code>
+ *   - ECHO-256: short name: <code>echo256</code>
+ *   - ECHO-384: short name: <code>echo384</code>
+ *   - ECHO-512: short name: <code>echo512</code>
+ * - Fugue family: file <code>sph_fugue.h</code>
+ *   - Fugue-224: short name: <code>fugue224</code>
+ *   - Fugue-256: short name: <code>fugue256</code>
+ *   - Fugue-384: short name: <code>fugue384</code>
+ *   - Fugue-512: short name: <code>fugue512</code>
+ * - Groestl family: file <code>sph_groestl.h</code>
+ *   - Groestl-224: short name: <code>groestl224</code>
+ *   - Groestl-256: short name: <code>groestl256</code>
+ *   - Groestl-384: short name: <code>groestl384</code>
+ *   - Groestl-512: short name: <code>groestl512</code>
+ * - Hamsi family: file <code>sph_hamsi.h</code>
+ *   - Hamsi-224: short name: <code>hamsi224</code>
+ *   - Hamsi-256: short name: <code>hamsi256</code>
+ *   - Hamsi-384: short name: <code>hamsi384</code>
+ *   - Hamsi-512: short name: <code>hamsi512</code>
+ * - JH family: file <code>sph_jh.h</code>
+ *   - JH-224: short name: <code>jh224</code>
+ *   - JH-256: short name: <code>jh256</code>
+ *   - JH-384: short name: <code>jh384</code>
+ *   - JH-512: short name: <code>jh512</code>
+ * - Keccak family: file <code>sph_keccak.h</code>
+ *   - Keccak-224: short name: <code>keccak224</code>
+ *   - Keccak-256: short name: <code>keccak256</code>
+ *   - Keccak-384: short name: <code>keccak384</code>
+ *   - Keccak-512: short name: <code>keccak512</code>
+ * - Luffa family: file <code>sph_luffa.h</code>
+ *   - Luffa-224: short name: <code>luffa224</code>
+ *   - Luffa-256: short name: <code>luffa256</code>
+ *   - Luffa-384: short name: <code>luffa384</code>
+ *   - Luffa-512: short name: <code>luffa512</code>
+ * - Shabal family: file <code>sph_shabal.h</code>
+ *   - Shabal-192: short name: <code>shabal192</code>
+ *   - Shabal-224: short name: <code>shabal224</code>
+ *   - Shabal-256: short name: <code>shabal256</code>
+ *   - Shabal-384: short name: <code>shabal384</code>
+ *   - Shabal-512: short name: <code>shabal512</code>
+ * - SHAvite-3 family: file <code>sph_shavite.h</code>
+ *   - SHAvite-224 (nominally "SHAvite-3 with 224-bit output"):
+ *     short name: <code>shabal224</code>
+ *   - SHAvite-256 (nominally "SHAvite-3 with 256-bit output"):
+ *     short name: <code>shabal256</code>
+ *   - SHAvite-384 (nominally "SHAvite-3 with 384-bit output"):
+ *     short name: <code>shabal384</code>
+ *   - SHAvite-512 (nominally "SHAvite-3 with 512-bit output"):
+ *     short name: <code>shabal512</code>
+ * - SIMD family: file <code>sph_simd.h</code>
+ *   - SIMD-224: short name: <code>simd224</code>
+ *   - SIMD-256: short name: <code>simd256</code>
+ *   - SIMD-384: short name: <code>simd384</code>
+ *   - SIMD-512: short name: <code>simd512</code>
+ * - Skein family: file <code>sph_skein.h</code>
+ *   - Skein-224 (nominally specified as Skein-512-224): short name:
+ *     <code>skein224</code> (64)
+ *   - Skein-256 (nominally specified as Skein-512-256): short name:
+ *     <code>skein256</code> (64)
+ *   - Skein-384 (nominally specified as Skein-512-384): short name:
+ *     <code>skein384</code> (64)
+ *   - Skein-512 (nominally specified as Skein-512-512): short name:
+ *     <code>skein512</code> (64)
+ *
+ * For the second-round SHA-3 candidates, the functions are as specified
+ * for round 2, i.e. with the "tweaks" that some candidates added
+ * between round 1 and round 2. Also, some of the submitted packages for
+ * round 2 contained errors, in the specification, reference code, or
+ * both. <code>sphlib</code> implements the corrected versions.
+ */
+
+/** @hideinitializer
+ * Unsigned integer type whose length is at least 32 bits; on most
+ * architectures, it will have a width of exactly 32 bits. Unsigned C
+ * types implement arithmetics modulo a power of 2; use the
+ * <code>SPH_T32()</code> macro to ensure that the value is truncated
+ * to exactly 32 bits. Unless otherwise specified, all macros and
+ * functions which accept <code>sph_u32</code> values assume that these
+ * values fit on 32 bits, i.e. do not exceed 2^32-1, even on architectures
+ * where <code>sph_u32</code> is larger than that.
+ */
+typedef __arch_dependant__ sph_u32;
+
+/** @hideinitializer
+ * Signed integer type corresponding to <code>sph_u32</code>; it has
+ * width 32 bits or more.
+ */
+typedef __arch_dependant__ sph_s32;
+
+/** @hideinitializer
+ * Unsigned integer type whose length is at least 64 bits; on most
+ * architectures which feature such a type, it will have a width of
+ * exactly 64 bits. C99-compliant platform will have this type; it
+ * is also defined when the GNU compiler (gcc) is used, and on
+ * platforms where <code>unsigned long</code> is large enough. If this
+ * type is not available, then some hash functions which depends on
+ * a 64-bit type will not be available (most notably SHA-384, SHA-512,
+ * Tiger and WHIRLPOOL).
+ */
+typedef __arch_dependant__ sph_u64;
+
+/** @hideinitializer
+ * Signed integer type corresponding to <code>sph_u64</code>; it has
+ * width 64 bits or more.
+ */
+typedef __arch_dependant__ sph_s64;
+
+/**
+ * This macro expands the token <code>x</code> into a suitable
+ * constant expression of type <code>sph_u32</code>. Depending on
+ * how this type is defined, a suffix such as <code>UL</code> may
+ * be appended to the argument.
+ *
+ * @param x   the token to expand into a suitable constant expression
+ */
+#define SPH_C32(x)
+
+/**
+ * Truncate a 32-bit value to exactly 32 bits. On most systems, this is
+ * a no-op, recognized as such by the compiler.
+ *
+ * @param x   the value to truncate (of type <code>sph_u32</code>)
+ */
+#define SPH_T32(x)
+
+/**
+ * Rotate a 32-bit value by a number of bits to the left. The rotate
+ * count must reside between 1 and 31. This macro assumes that its
+ * first argument fits in 32 bits (no extra bit allowed on machines where
+ * <code>sph_u32</code> is wider); both arguments may be evaluated
+ * several times.
+ *
+ * @param x   the value to rotate (of type <code>sph_u32</code>)
+ * @param n   the rotation count (between 1 and 31, inclusive)
+ */
+#define SPH_ROTL32(x, n)
+
+/**
+ * Rotate a 32-bit value by a number of bits to the left. The rotate
+ * count must reside between 1 and 31. This macro assumes that its
+ * first argument fits in 32 bits (no extra bit allowed on machines where
+ * <code>sph_u32</code> is wider); both arguments may be evaluated
+ * several times.
+ *
+ * @param x   the value to rotate (of type <code>sph_u32</code>)
+ * @param n   the rotation count (between 1 and 31, inclusive)
+ */
+#define SPH_ROTR32(x, n)
+
+/**
+ * This macro is defined on systems for which a 64-bit type has been
+ * detected, and is used for <code>sph_u64</code>.
+ */
+#define SPH_64
+
+/**
+ * This macro is defined on systems for the "native" integer size is
+ * 64 bits (64-bit values fit in one register).
+ */
+#define SPH_64_TRUE
+
+/**
+ * This macro expands the token <code>x</code> into a suitable
+ * constant expression of type <code>sph_u64</code>. Depending on
+ * how this type is defined, a suffix such as <code>ULL</code> may
+ * be appended to the argument. This macro is defined only if a
+ * 64-bit type was detected and used for <code>sph_u64</code>.
+ *
+ * @param x   the token to expand into a suitable constant expression
+ */
+#define SPH_C64(x)
+
+/**
+ * Truncate a 64-bit value to exactly 64 bits. On most systems, this is
+ * a no-op, recognized as such by the compiler. This macro is defined only
+ * if a 64-bit type was detected and used for <code>sph_u64</code>.
+ *
+ * @param x   the value to truncate (of type <code>sph_u64</code>)
+ */
+#define SPH_T64(x)
+
+/**
+ * Rotate a 64-bit value by a number of bits to the left. The rotate
+ * count must reside between 1 and 63. This macro assumes that its
+ * first argument fits in 64 bits (no extra bit allowed on machines where
+ * <code>sph_u64</code> is wider); both arguments may be evaluated
+ * several times. This macro is defined only if a 64-bit type was detected
+ * and used for <code>sph_u64</code>.
+ *
+ * @param x   the value to rotate (of type <code>sph_u64</code>)
+ * @param n   the rotation count (between 1 and 63, inclusive)
+ */
+#define SPH_ROTL64(x, n)
+
+/**
+ * Rotate a 64-bit value by a number of bits to the left. The rotate
+ * count must reside between 1 and 63. This macro assumes that its
+ * first argument fits in 64 bits (no extra bit allowed on machines where
+ * <code>sph_u64</code> is wider); both arguments may be evaluated
+ * several times. This macro is defined only if a 64-bit type was detected
+ * and used for <code>sph_u64</code>.
+ *
+ * @param x   the value to rotate (of type <code>sph_u64</code>)
+ * @param n   the rotation count (between 1 and 63, inclusive)
+ */
+#define SPH_ROTR64(x, n)
+
+/**
+ * This macro evaluates to <code>inline</code> or an equivalent construction,
+ * if available on the compilation platform, or to nothing otherwise. This
+ * is used to declare inline functions, for which the compiler should
+ * endeavour to include the code directly in the caller. Inline functions
+ * are typically defined in header files as replacement for macros.
+ */
+#define SPH_INLINE
+
+/**
+ * This macro is defined if the platform has been detected as using
+ * little-endian convention. This implies that the <code>sph_u32</code>
+ * type (and the <code>sph_u64</code> type also, if it is defined) has
+ * an exact width (i.e. exactly 32-bit, respectively 64-bit).
+ */
+#define SPH_LITTLE_ENDIAN
+
+/**
+ * This macro is defined if the platform has been detected as using
+ * big-endian convention. This implies that the <code>sph_u32</code>
+ * type (and the <code>sph_u64</code> type also, if it is defined) has
+ * an exact width (i.e. exactly 32-bit, respectively 64-bit).
+ */
+#define SPH_BIG_ENDIAN
+
+/**
+ * This macro is defined if 32-bit words (and 64-bit words, if defined)
+ * can be read from and written to memory efficiently in little-endian
+ * convention. This is the case for little-endian platforms, and also
+ * for the big-endian platforms which have special little-endian access
+ * opcodes (e.g. Ultrasparc).
+ */
+#define SPH_LITTLE_FAST
+
+/**
+ * This macro is defined if 32-bit words (and 64-bit words, if defined)
+ * can be read from and written to memory efficiently in big-endian
+ * convention. This is the case for little-endian platforms, and also
+ * for the little-endian platforms which have special big-endian access
+ * opcodes.
+ */
+#define SPH_BIG_FAST
+
+/**
+ * On some platforms, this macro is defined to an unsigned integer type
+ * into which pointer values may be cast. The resulting value can then
+ * be tested for being a multiple of 2, 4 or 8, indicating an aligned
+ * pointer for, respectively, 16-bit, 32-bit or 64-bit memory accesses.
+ */
+#define SPH_UPTR
+
+/**
+ * When defined, this macro indicates that unaligned memory accesses
+ * are possible with only a minor penalty, and thus should be prefered
+ * over strategies which first copy data to an aligned buffer.
+ */
+#define SPH_UNALIGNED
+
+/**
+ * Byte-swap a 32-bit word (i.e. <code>0x12345678</code> becomes
+ * <code>0x78563412</code>). This is an inline function which resorts
+ * to inline assembly on some platforms, for better performance.
+ *
+ * @param x   the 32-bit value to byte-swap
+ * @return  the byte-swapped value
+ */
+static inline sph_u32 sph_bswap32(sph_u32 x);
+
+/**
+ * Byte-swap a 64-bit word. This is an inline function which resorts
+ * to inline assembly on some platforms, for better performance. This
+ * function is defined only if a suitable 64-bit type was found for
+ * <code>sph_u64</code>
+ *
+ * @param x   the 64-bit value to byte-swap
+ * @return  the byte-swapped value
+ */
+static inline sph_u64 sph_bswap64(sph_u64 x);
+
+/**
+ * Decode a 16-bit unsigned value from memory, in little-endian convention
+ * (least significant byte comes first).
+ *
+ * @param src   the source address
+ * @return  the decoded value
+ */
+static inline unsigned sph_dec16le(const void *src);
+
+/**
+ * Encode a 16-bit unsigned value into memory, in little-endian convention
+ * (least significant byte comes first).
+ *
+ * @param dst   the destination buffer
+ * @param val   the value to encode
+ */
+static inline void sph_enc16le(void *dst, unsigned val);
+
+/**
+ * Decode a 16-bit unsigned value from memory, in big-endian convention
+ * (most significant byte comes first).
+ *
+ * @param src   the source address
+ * @return  the decoded value
+ */
+static inline unsigned sph_dec16be(const void *src);
+
+/**
+ * Encode a 16-bit unsigned value into memory, in big-endian convention
+ * (most significant byte comes first).
+ *
+ * @param dst   the destination buffer
+ * @param val   the value to encode
+ */
+static inline void sph_enc16be(void *dst, unsigned val);
+
+/**
+ * Decode a 32-bit unsigned value from memory, in little-endian convention
+ * (least significant byte comes first).
+ *
+ * @param src   the source address
+ * @return  the decoded value
+ */
+static inline sph_u32 sph_dec32le(const void *src);
+
+/**
+ * Decode a 32-bit unsigned value from memory, in little-endian convention
+ * (least significant byte comes first). This function assumes that the
+ * source address is suitably aligned for a direct access, if the platform
+ * supports such things; it can thus be marginally faster than the generic
+ * <code>sph_dec32le()</code> function.
+ *
+ * @param src   the source address
+ * @return  the decoded value
+ */
+static inline sph_u32 sph_dec32le_aligned(const void *src);
+
+/**
+ * Encode a 32-bit unsigned value into memory, in little-endian convention
+ * (least significant byte comes first).
+ *
+ * @param dst   the destination buffer
+ * @param val   the value to encode
+ */
+static inline void sph_enc32le(void *dst, sph_u32 val);
+
+/**
+ * Encode a 32-bit unsigned value into memory, in little-endian convention
+ * (least significant byte comes first). This function assumes that the
+ * destination address is suitably aligned for a direct access, if the
+ * platform supports such things; it can thus be marginally faster than
+ * the generic <code>sph_enc32le()</code> function.
+ *
+ * @param dst   the destination buffer
+ * @param val   the value to encode
+ */
+static inline void sph_enc32le_aligned(void *dst, sph_u32 val);
+
+/**
+ * Decode a 32-bit unsigned value from memory, in big-endian convention
+ * (most significant byte comes first).
+ *
+ * @param src   the source address
+ * @return  the decoded value
+ */
+static inline sph_u32 sph_dec32be(const void *src);
+
+/**
+ * Decode a 32-bit unsigned value from memory, in big-endian convention
+ * (most significant byte comes first). This function assumes that the
+ * source address is suitably aligned for a direct access, if the platform
+ * supports such things; it can thus be marginally faster than the generic
+ * <code>sph_dec32be()</code> function.
+ *
+ * @param src   the source address
+ * @return  the decoded value
+ */
+static inline sph_u32 sph_dec32be_aligned(const void *src);
+
+/**
+ * Encode a 32-bit unsigned value into memory, in big-endian convention
+ * (most significant byte comes first).
+ *
+ * @param dst   the destination buffer
+ * @param val   the value to encode
+ */
+static inline void sph_enc32be(void *dst, sph_u32 val);
+
+/**
+ * Encode a 32-bit unsigned value into memory, in big-endian convention
+ * (most significant byte comes first). This function assumes that the
+ * destination address is suitably aligned for a direct access, if the
+ * platform supports such things; it can thus be marginally faster than
+ * the generic <code>sph_enc32be()</code> function.
+ *
+ * @param dst   the destination buffer
+ * @param val   the value to encode
+ */
+static inline void sph_enc32be_aligned(void *dst, sph_u32 val);
+
+/**
+ * Decode a 64-bit unsigned value from memory, in little-endian convention
+ * (least significant byte comes first). This function is defined only
+ * if a suitable 64-bit type was detected and used for <code>sph_u64</code>.
+ *
+ * @param src   the source address
+ * @return  the decoded value
+ */
+static inline sph_u64 sph_dec64le(const void *src);
+
+/**
+ * Decode a 64-bit unsigned value from memory, in little-endian convention
+ * (least significant byte comes first). This function assumes that the
+ * source address is suitably aligned for a direct access, if the platform
+ * supports such things; it can thus be marginally faster than the generic
+ * <code>sph_dec64le()</code> function. This function is defined only
+ * if a suitable 64-bit type was detected and used for <code>sph_u64</code>.
+ *
+ * @param src   the source address
+ * @return  the decoded value
+ */
+static inline sph_u64 sph_dec64le_aligned(const void *src);
+
+/**
+ * Encode a 64-bit unsigned value into memory, in little-endian convention
+ * (least significant byte comes first). This function is defined only
+ * if a suitable 64-bit type was detected and used for <code>sph_u64</code>.
+ *
+ * @param dst   the destination buffer
+ * @param val   the value to encode
+ */
+static inline void sph_enc64le(void *dst, sph_u64 val);
+
+/**
+ * Encode a 64-bit unsigned value into memory, in little-endian convention
+ * (least significant byte comes first). This function assumes that the
+ * destination address is suitably aligned for a direct access, if the
+ * platform supports such things; it can thus be marginally faster than
+ * the generic <code>sph_enc64le()</code> function. This function is defined
+ * only if a suitable 64-bit type was detected and used for
+ * <code>sph_u64</code>.
+ *
+ * @param dst   the destination buffer
+ * @param val   the value to encode
+ */
+static inline void sph_enc64le_aligned(void *dst, sph_u64 val);
+
+/**
+ * Decode a 64-bit unsigned value from memory, in big-endian convention
+ * (most significant byte comes first). This function is defined only
+ * if a suitable 64-bit type was detected and used for <code>sph_u64</code>.
+ *
+ * @param src   the source address
+ * @return  the decoded value
+ */
+static inline sph_u64 sph_dec64be(const void *src);
+
+/**
+ * Decode a 64-bit unsigned value from memory, in big-endian convention
+ * (most significant byte comes first). This function assumes that the
+ * source address is suitably aligned for a direct access, if the platform
+ * supports such things; it can thus be marginally faster than the generic
+ * <code>sph_dec64be()</code> function. This function is defined only
+ * if a suitable 64-bit type was detected and used for <code>sph_u64</code>.
+ *
+ * @param src   the source address
+ * @return  the decoded value
+ */
+static inline sph_u64 sph_dec64be_aligned(const void *src);
+
+/**
+ * Encode a 64-bit unsigned value into memory, in big-endian convention
+ * (most significant byte comes first). This function is defined only
+ * if a suitable 64-bit type was detected and used for <code>sph_u64</code>.
+ *
+ * @param dst   the destination buffer
+ * @param val   the value to encode
+ */
+static inline void sph_enc64be(void *dst, sph_u64 val);
+
+/**
+ * Encode a 64-bit unsigned value into memory, in big-endian convention
+ * (most significant byte comes first). This function assumes that the
+ * destination address is suitably aligned for a direct access, if the
+ * platform supports such things; it can thus be marginally faster than
+ * the generic <code>sph_enc64be()</code> function. This function is defined
+ * only if a suitable 64-bit type was detected and used for
+ * <code>sph_u64</code>.
+ *
+ * @param dst   the destination buffer
+ * @param val   the value to encode
+ */
+static inline void sph_enc64be_aligned(void *dst, sph_u64 val);
+
+#endif
+
+/* ============== END documentation block for Doxygen ============= */
+
+#ifndef DOXYGEN_IGNORE
+
+/*
+ * We want to define the types "sph_u32" and "sph_u64" which hold
+ * unsigned values of at least, respectively, 32 and 64 bits. These
+ * tests should select appropriate types for most platforms. The
+ * macro "SPH_64" is defined if the 64-bit is supported.
+ */
+
+#undef SPH_64
+#undef SPH_64_TRUE
+
+#if defined __STDC__ && __STDC_VERSION__ >= 199901L
+
+/*
+ * On C99 implementations, we can use <stdint.h> to get an exact 64-bit
+ * type, if any, or otherwise use a wider type (which must exist, for
+ * C99 conformance).
+ */
+
+#include <stdint.h>
+
+#ifdef UINT32_MAX
+typedef uint32_t sph_u32;
+typedef int32_t sph_s32;
+#else
+typedef uint_fast32_t sph_u32;
+typedef int_fast32_t sph_s32;
+#endif
+#if !SPH_NO_64
+#ifdef UINT64_MAX
+typedef uint64_t sph_u64;
+typedef int64_t sph_s64;
+#else
+typedef uint_fast64_t sph_u64;
+typedef int_fast64_t sph_s64;
+#endif
+#endif
+
+#define SPH_C32(x)    ((sph_u32)(x))
+#if !SPH_NO_64
+#define SPH_C64(x)    ((sph_u64)(x))
+#define SPH_64  1
+#endif
+
+#else
+
+/*
+ * On non-C99 systems, we use "unsigned int" if it is wide enough,
+ * "unsigned long" otherwise. This supports all "reasonable" architectures.
+ * We have to be cautious: pre-C99 preprocessors handle constants
+ * differently in '#if' expressions. Hence the shifts to test UINT_MAX.
+ */
+
+#if ((UINT_MAX >> 11) >> 11) >= 0x3FF
+
+typedef unsigned int sph_u32;
+typedef int sph_s32;
+
+#define SPH_C32(x)    ((sph_u32)(x ## U))
+
+#else
+
+typedef unsigned long sph_u32;
+typedef long sph_s32;
+
+#define SPH_C32(x)    ((sph_u32)(x ## UL))
+
+#endif
+
+#if !SPH_NO_64
+
+/*
+ * We want a 64-bit type. We use "unsigned long" if it is wide enough (as
+ * is common on 64-bit architectures such as AMD64, Alpha or Sparcv9),
+ * "unsigned long long" otherwise, if available. We use ULLONG_MAX to
+ * test whether "unsigned long long" is available; we also know that
+ * gcc features this type, even if the libc header do not know it.
+ */
+
+#if ((ULONG_MAX >> 31) >> 31) >= 3
+
+typedef unsigned long sph_u64;
+typedef long sph_s64;
+
+#define SPH_C64(x)    ((sph_u64)(x ## UL))
+
+#define SPH_64  1
+
+#elif ((ULLONG_MAX >> 31) >> 31) >= 3 || defined __GNUC__
+
+typedef unsigned long long sph_u64;
+typedef long long sph_s64;
+
+#define SPH_C64(x)    ((sph_u64)(x ## ULL))
+
+#define SPH_64  1
+
+#else
+
+/*
+ * No 64-bit type...
+ */
+
+#endif
+
+#endif
+
+#endif
+
+/*
+ * If the "unsigned long" type has length 64 bits or more, then this is
+ * a "true" 64-bit architectures. This is also true with Visual C on
+ * amd64, even though the "long" type is limited to 32 bits.
+ */
+#if SPH_64 && (((ULONG_MAX >> 31) >> 31) >= 3 || defined _M_X64)
+#define SPH_64_TRUE   1
+#endif
+
+/*
+ * Implementation note: some processors have specific opcodes to perform
+ * a rotation. Recent versions of gcc recognize the expression above and
+ * use the relevant opcodes, when appropriate.
+ */
+
+#define SPH_T32(x)    ((x) & SPH_C32(0xFFFFFFFF))
+#define SPH_ROTL32(x, n)   SPH_T32(((x) << (n)) | ((x) >> (32 - (n))))
+#define SPH_ROTR32(x, n)   SPH_ROTL32(x, (32 - (n)))
+
+#if SPH_64
+
+#define SPH_T64(x)    ((x) & SPH_C64(0xFFFFFFFFFFFFFFFF))
+#define SPH_ROTL64(x, n)   SPH_T64(((x) << (n)) | ((x) >> (64 - (n))))
+#define SPH_ROTR64(x, n)   SPH_ROTL64(x, (64 - (n)))
+
+#endif
+
+#ifndef DOXYGEN_IGNORE
+/*
+ * Define SPH_INLINE to be an "inline" qualifier, if available. We define
+ * some small macro-like functions which benefit greatly from being inlined.
+ */
+#if (defined __STDC__ && __STDC_VERSION__ >= 199901L) || defined __GNUC__
+#define SPH_INLINE inline
+#elif defined _MSC_VER
+#define SPH_INLINE __inline
+#else
+#define SPH_INLINE
+#endif
+#endif
+
+/*
+ * We define some macros which qualify the architecture. These macros
+ * may be explicit set externally (e.g. as compiler parameters). The
+ * code below sets those macros if they are not already defined.
+ *
+ * Most macros are boolean, thus evaluate to either zero or non-zero.
+ * The SPH_UPTR macro is special, in that it evaluates to a C type,
+ * or is not defined.
+ *
+ * SPH_UPTR             if defined: unsigned type to cast pointers into
+ *
+ * SPH_UNALIGNED        non-zero if unaligned accesses are efficient
+ * SPH_LITTLE_ENDIAN    non-zero if architecture is known to be little-endian
+ * SPH_BIG_ENDIAN       non-zero if architecture is known to be big-endian
+ * SPH_LITTLE_FAST      non-zero if little-endian decoding is fast
+ * SPH_BIG_FAST         non-zero if big-endian decoding is fast
+ *
+ * If SPH_UPTR is defined, then encoding and decoding of 32-bit and 64-bit
+ * values will try to be "smart". Either SPH_LITTLE_ENDIAN or SPH_BIG_ENDIAN
+ * _must_ be non-zero in those situations. The 32-bit and 64-bit types
+ * _must_ also have an exact width.
+ *
+ * SPH_SPARCV9_GCC_32   UltraSPARC-compatible with gcc, 32-bit mode
+ * SPH_SPARCV9_GCC_64   UltraSPARC-compatible with gcc, 64-bit mode
+ * SPH_SPARCV9_GCC      UltraSPARC-compatible with gcc
+ * SPH_I386_GCC         x86-compatible (32-bit) with gcc
+ * SPH_I386_MSVC        x86-compatible (32-bit) with Microsoft Visual C
+ * SPH_AMD64_GCC        x86-compatible (64-bit) with gcc
+ * SPH_AMD64_MSVC       x86-compatible (64-bit) with Microsoft Visual C
+ * SPH_PPC32_GCC        PowerPC, 32-bit, with gcc
+ * SPH_PPC64_GCC        PowerPC, 64-bit, with gcc
+ *
+ * TODO: enhance automatic detection, for more architectures and compilers.
+ * Endianness is the most important. SPH_UNALIGNED and SPH_UPTR help with
+ * some very fast functions (e.g. MD4) when using unaligned input data.
+ * The CPU-specific-with-GCC macros are useful only for inline assembly,
+ * normally restrained to this header file.
+ */
+
+/*
+ * 32-bit x86, aka "i386 compatible".
+ */
+#if defined __i386__ || defined _M_IX86
+
+#define SPH_DETECT_UNALIGNED         1
+#define SPH_DETECT_LITTLE_ENDIAN     1
+#define SPH_DETECT_UPTR              sph_u32
+#ifdef __GNUC__
+#define SPH_DETECT_I386_GCC          1
+#endif
+#ifdef _MSC_VER
+#define SPH_DETECT_I386_MSVC         1
+#endif
+
+/*
+ * 64-bit x86, hereafter known as "amd64".
+ */
+#elif defined __x86_64 || defined _M_X64
+
+#define SPH_DETECT_UNALIGNED         1
+#define SPH_DETECT_LITTLE_ENDIAN     1
+#define SPH_DETECT_UPTR              sph_u64
+#ifdef __GNUC__
+#define SPH_DETECT_AMD64_GCC         1
+#endif
+#ifdef _MSC_VER
+#define SPH_DETECT_AMD64_MSVC        1
+#endif
+
+/*
+ * 64-bit Sparc architecture (implies v9).
+ */
+#elif ((defined __sparc__ || defined __sparc) && defined __arch64__) \
+	|| defined __sparcv9
+
+#define SPH_DETECT_BIG_ENDIAN        1
+#define SPH_DETECT_UPTR              sph_u64
+#ifdef __GNUC__
+#define SPH_DETECT_SPARCV9_GCC_64    1
+#define SPH_DETECT_LITTLE_FAST       1
+#endif
+
+/*
+ * 32-bit Sparc.
+ */
+#elif (defined __sparc__ || defined __sparc) \
+	&& !(defined __sparcv9 || defined __arch64__)
+
+#define SPH_DETECT_BIG_ENDIAN        1
+#define SPH_DETECT_UPTR              sph_u32
+#if defined __GNUC__ && defined __sparc_v9__
+#define SPH_DETECT_SPARCV9_GCC_32    1
+#define SPH_DETECT_LITTLE_FAST       1
+#endif
+
+/*
+ * ARM, little-endian.
+ */
+#elif defined __arm__ && __ARMEL__
+
+#define SPH_DETECT_LITTLE_ENDIAN     1
+
+/*
+ * MIPS, little-endian.
+ */
+#elif MIPSEL || _MIPSEL || __MIPSEL || __MIPSEL__
+
+#define SPH_DETECT_LITTLE_ENDIAN     1
+
+/*
+ * MIPS, big-endian.
+ */
+#elif MIPSEB || _MIPSEB || __MIPSEB || __MIPSEB__
+
+#define SPH_DETECT_BIG_ENDIAN        1
+
+/*
+ * PowerPC.
+ */
+#elif defined __powerpc__ || defined __POWERPC__ || defined __ppc__ \
+	|| defined _ARCH_PPC
+
+/*
+ * Note: we do not declare cross-endian access to be "fast": even if
+ * using inline assembly, implementation should still assume that
+ * keeping the decoded word in a temporary is faster than decoding
+ * it again.
+ */
+#if defined __GNUC__
+#if SPH_64_TRUE
+#define SPH_DETECT_PPC64_GCC         1
+#else
+#define SPH_DETECT_PPC32_GCC         1
+#endif
+#endif
+
+#if defined __BIG_ENDIAN__ || defined _BIG_ENDIAN
+#define SPH_DETECT_BIG_ENDIAN        1
+#elif defined __LITTLE_ENDIAN__ || defined _LITTLE_ENDIAN
+#define SPH_DETECT_LITTLE_ENDIAN     1
+#endif
+
+/*
+ * Itanium, 64-bit.
+ */
+#elif defined __ia64 || defined __ia64__ \
+	|| defined __itanium__ || defined _M_IA64
+
+#if defined __BIG_ENDIAN__ || defined _BIG_ENDIAN
+#define SPH_DETECT_BIG_ENDIAN        1
+#else
+#define SPH_DETECT_LITTLE_ENDIAN     1
+#endif
+#if defined __LP64__ || defined _LP64
+#define SPH_DETECT_UPTR              sph_u64
+#else
+#define SPH_DETECT_UPTR              sph_u32
+#endif
+
+#endif
+
+#if defined SPH_DETECT_SPARCV9_GCC_32 || defined SPH_DETECT_SPARCV9_GCC_64
+#define SPH_DETECT_SPARCV9_GCC       1
+#endif
+
+#if defined SPH_DETECT_UNALIGNED && !defined SPH_UNALIGNED
+#define SPH_UNALIGNED         SPH_DETECT_UNALIGNED
+#endif
+#if defined SPH_DETECT_UPTR && !defined SPH_UPTR
+#define SPH_UPTR              SPH_DETECT_UPTR
+#endif
+#if defined SPH_DETECT_LITTLE_ENDIAN && !defined SPH_LITTLE_ENDIAN
+#define SPH_LITTLE_ENDIAN     SPH_DETECT_LITTLE_ENDIAN
+#endif
+#if defined SPH_DETECT_BIG_ENDIAN && !defined SPH_BIG_ENDIAN
+#define SPH_BIG_ENDIAN        SPH_DETECT_BIG_ENDIAN
+#endif
+#if defined SPH_DETECT_LITTLE_FAST && !defined SPH_LITTLE_FAST
+#define SPH_LITTLE_FAST       SPH_DETECT_LITTLE_FAST
+#endif
+#if defined SPH_DETECT_BIG_FAST && !defined SPH_BIG_FAST
+#define SPH_BIG_FAST    SPH_DETECT_BIG_FAST
+#endif
+#if defined SPH_DETECT_SPARCV9_GCC_32 && !defined SPH_SPARCV9_GCC_32
+#define SPH_SPARCV9_GCC_32    SPH_DETECT_SPARCV9_GCC_32
+#endif
+#if defined SPH_DETECT_SPARCV9_GCC_64 && !defined SPH_SPARCV9_GCC_64
+#define SPH_SPARCV9_GCC_64    SPH_DETECT_SPARCV9_GCC_64
+#endif
+#if defined SPH_DETECT_SPARCV9_GCC && !defined SPH_SPARCV9_GCC
+#define SPH_SPARCV9_GCC       SPH_DETECT_SPARCV9_GCC
+#endif
+#if defined SPH_DETECT_I386_GCC && !defined SPH_I386_GCC
+#define SPH_I386_GCC          SPH_DETECT_I386_GCC
+#endif
+#if defined SPH_DETECT_I386_MSVC && !defined SPH_I386_MSVC
+#define SPH_I386_MSVC         SPH_DETECT_I386_MSVC
+#endif
+#if defined SPH_DETECT_AMD64_GCC && !defined SPH_AMD64_GCC
+#define SPH_AMD64_GCC         SPH_DETECT_AMD64_GCC
+#endif
+#if defined SPH_DETECT_AMD64_MSVC && !defined SPH_AMD64_MSVC
+#define SPH_AMD64_MSVC        SPH_DETECT_AMD64_MSVC
+#endif
+#if defined SPH_DETECT_PPC32_GCC && !defined SPH_PPC32_GCC
+#define SPH_PPC32_GCC         SPH_DETECT_PPC32_GCC
+#endif
+#if defined SPH_DETECT_PPC64_GCC && !defined SPH_PPC64_GCC
+#define SPH_PPC64_GCC         SPH_DETECT_PPC64_GCC
+#endif
+
+#if SPH_LITTLE_ENDIAN && !defined SPH_LITTLE_FAST
+#define SPH_LITTLE_FAST              1
+#endif
+#if SPH_BIG_ENDIAN && !defined SPH_BIG_FAST
+#define SPH_BIG_FAST                 1
+#endif
+
+#if defined SPH_UPTR && !(SPH_LITTLE_ENDIAN || SPH_BIG_ENDIAN)
+#error SPH_UPTR defined, but endianness is not known.
+#endif
+
+#if SPH_I386_GCC && !SPH_NO_ASM
+
+/*
+ * On x86 32-bit, with gcc, we use the bswapl opcode to byte-swap 32-bit
+ * values.
+ */
+
+static SPH_INLINE sph_u32
+sph_bswap32(sph_u32 x)
+{
+	__asm__ __volatile__ ("bswapl %0" : "=r" (x) : "0" (x));
+	return x;
+}
+
+#if SPH_64
+
+static SPH_INLINE sph_u64
+sph_bswap64(sph_u64 x)
+{
+	return ((sph_u64)sph_bswap32((sph_u32)x) << 32)
+		| (sph_u64)sph_bswap32((sph_u32)(x >> 32));
+}
+
+#endif
+
+#elif SPH_AMD64_GCC && !SPH_NO_ASM
+
+/*
+ * On x86 64-bit, with gcc, we use the bswapl opcode to byte-swap 32-bit
+ * and 64-bit values.
+ */
+
+static SPH_INLINE sph_u32
+sph_bswap32(sph_u32 x)
+{
+	__asm__ __volatile__ ("bswapl %0" : "=r" (x) : "0" (x));
+	return x;
+}
+
+#if SPH_64
+
+static SPH_INLINE sph_u64
+sph_bswap64(sph_u64 x)
+{
+	__asm__ __volatile__ ("bswapq %0" : "=r" (x) : "0" (x));
+	return x;
+}
+
+#endif
+
+/*
+ * Disabled code. Apparently, Microsoft Visual C 2005 is smart enough
+ * to generate proper opcodes for endianness swapping with the pure C
+ * implementation below.
+ *
+
+#elif SPH_I386_MSVC && !SPH_NO_ASM
+
+static __inline sph_u32 __declspec(naked) __fastcall
+sph_bswap32(sph_u32 x)
+{
+	__asm {
+		bswap  ecx
+		mov    eax,ecx
+		ret
+	}
+}
+
+#if SPH_64
+
+static SPH_INLINE sph_u64
+sph_bswap64(sph_u64 x)
+{
+	return ((sph_u64)sph_bswap32((sph_u32)x) << 32)
+		| (sph_u64)sph_bswap32((sph_u32)(x >> 32));
+}
+
+#endif
+
+ *
+ * [end of disabled code]
+ */
+
+#else
+
+static SPH_INLINE sph_u32
+sph_bswap32(sph_u32 x)
+{
+	x = SPH_T32((x << 16) | (x >> 16));
+	x = ((x & SPH_C32(0xFF00FF00)) >> 8)
+		| ((x & SPH_C32(0x00FF00FF)) << 8);
+	return x;
+}
+
+#if SPH_64
+
+/**
+ * Byte-swap a 64-bit value.
+ *
+ * @param x   the input value
+ * @return  the byte-swapped value
+ */
+static SPH_INLINE sph_u64
+sph_bswap64(sph_u64 x)
+{
+	x = SPH_T64((x << 32) | (x >> 32));
+	x = ((x & SPH_C64(0xFFFF0000FFFF0000)) >> 16)
+		| ((x & SPH_C64(0x0000FFFF0000FFFF)) << 16);
+	x = ((x & SPH_C64(0xFF00FF00FF00FF00)) >> 8)
+		| ((x & SPH_C64(0x00FF00FF00FF00FF)) << 8);
+	return x;
+}
+
+#endif
+
+#endif
+
+#if SPH_SPARCV9_GCC && !SPH_NO_ASM
+
+/*
+ * On UltraSPARC systems, native ordering is big-endian, but it is
+ * possible to perform little-endian read accesses by specifying the
+ * address space 0x88 (ASI_PRIMARY_LITTLE). Basically, either we use
+ * the opcode "lda [%reg]0x88,%dst", where %reg is the register which
+ * contains the source address and %dst is the destination register,
+ * or we use "lda [%reg+imm]%asi,%dst", which uses the %asi register
+ * to get the address space name. The latter format is better since it
+ * combines an addition and the actual access in a single opcode; but
+ * it requires the setting (and subsequent resetting) of %asi, which is
+ * slow. Some operations (i.e. MD5 compression function) combine many
+ * successive little-endian read accesses, which may share the same
+ * %asi setting. The macros below contain the appropriate inline
+ * assembly.
+ */
+
+#define SPH_SPARCV9_SET_ASI   \
+	sph_u32 sph_sparcv9_asi; \
+	__asm__ __volatile__ ( \
+		"rd %%asi,%0\n\twr %%g0,0x88,%%asi" : "=r" (sph_sparcv9_asi));
+
+#define SPH_SPARCV9_RESET_ASI  \
+	__asm__ __volatile__ ("wr %%g0,%0,%%asi" : : "r" (sph_sparcv9_asi));
+
+#define SPH_SPARCV9_DEC32LE(base, idx)   ({ \
+		sph_u32 sph_sparcv9_tmp; \
+		__asm__ __volatile__ ("lda [%1+" #idx "*4]%%asi,%0" \
+			: "=r" (sph_sparcv9_tmp) : "r" (base)); \
+		sph_sparcv9_tmp; \
+	})
+
+#endif
+
+static SPH_INLINE void
+sph_enc16be(void *dst, unsigned val)
+{
+	((unsigned char *)dst)[0] = (val >> 8);
+	((unsigned char *)dst)[1] = val;
+}
+
+static SPH_INLINE unsigned
+sph_dec16be(const void *src)
+{
+	return ((unsigned)(((const unsigned char *)src)[0]) << 8)
+		| (unsigned)(((const unsigned char *)src)[1]);
+}
+
+static SPH_INLINE void
+sph_enc16le(void *dst, unsigned val)
+{
+	((unsigned char *)dst)[0] = val;
+	((unsigned char *)dst)[1] = val >> 8;
+}
+
+static SPH_INLINE unsigned
+sph_dec16le(const void *src)
+{
+	return (unsigned)(((const unsigned char *)src)[0])
+		| ((unsigned)(((const unsigned char *)src)[1]) << 8);
+}
+
+/**
+ * Encode a 32-bit value into the provided buffer (big endian convention).
+ *
+ * @param dst   the destination buffer
+ * @param val   the 32-bit value to encode
+ */
+static SPH_INLINE void
+sph_enc32be(void *dst, sph_u32 val)
+{
+#if defined SPH_UPTR
+#if SPH_UNALIGNED
+#if SPH_LITTLE_ENDIAN
+	val = sph_bswap32(val);
+#endif
+	*(sph_u32 *)dst = val;
+#else
+	if (((SPH_UPTR)dst & 3) == 0) {
+#if SPH_LITTLE_ENDIAN
+		val = sph_bswap32(val);
+#endif
+		*(sph_u32 *)dst = val;
+	} else {
+		((unsigned char *)dst)[0] = (val >> 24);
+		((unsigned char *)dst)[1] = (val >> 16);
+		((unsigned char *)dst)[2] = (val >> 8);
+		((unsigned char *)dst)[3] = val;
+	}
+#endif
+#else
+	((unsigned char *)dst)[0] = (val >> 24);
+	((unsigned char *)dst)[1] = (val >> 16);
+	((unsigned char *)dst)[2] = (val >> 8);
+	((unsigned char *)dst)[3] = val;
+#endif
+}
+
+/**
+ * Encode a 32-bit value into the provided buffer (big endian convention).
+ * The destination buffer must be properly aligned.
+ *
+ * @param dst   the destination buffer (32-bit aligned)
+ * @param val   the value to encode
+ */
+static SPH_INLINE void
+sph_enc32be_aligned(void *dst, sph_u32 val)
+{
+#if SPH_LITTLE_ENDIAN
+	*(sph_u32 *)dst = sph_bswap32(val);
+#elif SPH_BIG_ENDIAN
+	*(sph_u32 *)dst = val;
+#else
+	((unsigned char *)dst)[0] = (val >> 24);
+	((unsigned char *)dst)[1] = (val >> 16);
+	((unsigned char *)dst)[2] = (val >> 8);
+	((unsigned char *)dst)[3] = val;
+#endif
+}
+
+/**
+ * Decode a 32-bit value from the provided buffer (big endian convention).
+ *
+ * @param src   the source buffer
+ * @return  the decoded value
+ */
+static SPH_INLINE sph_u32
+sph_dec32be(const void *src)
+{
+#if defined SPH_UPTR
+#if SPH_UNALIGNED
+#if SPH_LITTLE_ENDIAN
+	return sph_bswap32(*(const sph_u32 *)src);
+#else
+	return *(const sph_u32 *)src;
+#endif
+#else
+	if (((SPH_UPTR)src & 3) == 0) {
+#if SPH_LITTLE_ENDIAN
+		return sph_bswap32(*(const sph_u32 *)src);
+#else
+		return *(const sph_u32 *)src;
+#endif
+	} else {
+		return ((sph_u32)(((const unsigned char *)src)[0]) << 24)
+			| ((sph_u32)(((const unsigned char *)src)[1]) << 16)
+			| ((sph_u32)(((const unsigned char *)src)[2]) << 8)
+			| (sph_u32)(((const unsigned char *)src)[3]);
+	}
+#endif
+#else
+	return ((sph_u32)(((const unsigned char *)src)[0]) << 24)
+		| ((sph_u32)(((const unsigned char *)src)[1]) << 16)
+		| ((sph_u32)(((const unsigned char *)src)[2]) << 8)
+		| (sph_u32)(((const unsigned char *)src)[3]);
+#endif
+}
+
+/**
+ * Decode a 32-bit value from the provided buffer (big endian convention).
+ * The source buffer must be properly aligned.
+ *
+ * @param src   the source buffer (32-bit aligned)
+ * @return  the decoded value
+ */
+static SPH_INLINE sph_u32
+sph_dec32be_aligned(const void *src)
+{
+#if SPH_LITTLE_ENDIAN
+	return sph_bswap32(*(const sph_u32 *)src);
+#elif SPH_BIG_ENDIAN
+	return *(const sph_u32 *)src;
+#else
+	return ((sph_u32)(((const unsigned char *)src)[0]) << 24)
+		| ((sph_u32)(((const unsigned char *)src)[1]) << 16)
+		| ((sph_u32)(((const unsigned char *)src)[2]) << 8)
+		| (sph_u32)(((const unsigned char *)src)[3]);
+#endif
+}
+
+/**
+ * Encode a 32-bit value into the provided buffer (little endian convention).
+ *
+ * @param dst   the destination buffer
+ * @param val   the 32-bit value to encode
+ */
+static SPH_INLINE void
+sph_enc32le(void *dst, sph_u32 val)
+{
+#if defined SPH_UPTR
+#if SPH_UNALIGNED
+#if SPH_BIG_ENDIAN
+	val = sph_bswap32(val);
+#endif
+	*(sph_u32 *)dst = val;
+#else
+	if (((SPH_UPTR)dst & 3) == 0) {
+#if SPH_BIG_ENDIAN
+		val = sph_bswap32(val);
+#endif
+		*(sph_u32 *)dst = val;
+	} else {
+		((unsigned char *)dst)[0] = val;
+		((unsigned char *)dst)[1] = (val >> 8);
+		((unsigned char *)dst)[2] = (val >> 16);
+		((unsigned char *)dst)[3] = (val >> 24);
+	}
+#endif
+#else
+	((unsigned char *)dst)[0] = val;
+	((unsigned char *)dst)[1] = (val >> 8);
+	((unsigned char *)dst)[2] = (val >> 16);
+	((unsigned char *)dst)[3] = (val >> 24);
+#endif
+}
+
+/**
+ * Encode a 32-bit value into the provided buffer (little endian convention).
+ * The destination buffer must be properly aligned.
+ *
+ * @param dst   the destination buffer (32-bit aligned)
+ * @param val   the value to encode
+ */
+static SPH_INLINE void
+sph_enc32le_aligned(void *dst, sph_u32 val)
+{
+#if SPH_LITTLE_ENDIAN
+	*(sph_u32 *)dst = val;
+#elif SPH_BIG_ENDIAN
+	*(sph_u32 *)dst = sph_bswap32(val);
+#else
+	((unsigned char *)dst)[0] = val;
+	((unsigned char *)dst)[1] = (val >> 8);
+	((unsigned char *)dst)[2] = (val >> 16);
+	((unsigned char *)dst)[3] = (val >> 24);
+#endif
+}
+
+/**
+ * Decode a 32-bit value from the provided buffer (little endian convention).
+ *
+ * @param src   the source buffer
+ * @return  the decoded value
+ */
+static SPH_INLINE sph_u32
+sph_dec32le(const void *src)
+{
+#if defined SPH_UPTR
+#if SPH_UNALIGNED
+#if SPH_BIG_ENDIAN
+	return sph_bswap32(*(const sph_u32 *)src);
+#else
+	return *(const sph_u32 *)src;
+#endif
+#else
+	if (((SPH_UPTR)src & 3) == 0) {
+#if SPH_BIG_ENDIAN
+#if SPH_SPARCV9_GCC && !SPH_NO_ASM
+		sph_u32 tmp;
+
+		/*
+		 * "__volatile__" is needed here because without it,
+		 * gcc-3.4.3 miscompiles the code and performs the
+		 * access before the test on the address, thus triggering
+		 * a bus error...
+		 */
+		__asm__ __volatile__ (
+			"lda [%1]0x88,%0" : "=r" (tmp) : "r" (src));
+		return tmp;
+/*
+ * On PowerPC, this turns out not to be worth the effort: the inline
+ * assembly makes GCC optimizer uncomfortable, which tends to nullify
+ * the decoding gains.
+ *
+ * For most hash functions, using this inline assembly trick changes
+ * hashing speed by less than 5% and often _reduces_ it. The biggest
+ * gains are for MD4 (+11%) and CubeHash (+30%). For all others, it is
+ * less then 10%. The speed gain on CubeHash is probably due to the
+ * chronic shortage of registers that CubeHash endures; for the other
+ * functions, the generic code appears to be efficient enough already.
+ *
+#elif (SPH_PPC32_GCC || SPH_PPC64_GCC) && !SPH_NO_ASM
+		sph_u32 tmp;
+
+		__asm__ __volatile__ (
+			"lwbrx %0,0,%1" : "=r" (tmp) : "r" (src));
+		return tmp;
+ */
+#else
+		return sph_bswap32(*(const sph_u32 *)src);
+#endif
+#else
+		return *(const sph_u32 *)src;
+#endif
+	} else {
+		return (sph_u32)(((const unsigned char *)src)[0])
+			| ((sph_u32)(((const unsigned char *)src)[1]) << 8)
+			| ((sph_u32)(((const unsigned char *)src)[2]) << 16)
+			| ((sph_u32)(((const unsigned char *)src)[3]) << 24);
+	}
+#endif
+#else
+	return (sph_u32)(((const unsigned char *)src)[0])
+		| ((sph_u32)(((const unsigned char *)src)[1]) << 8)
+		| ((sph_u32)(((const unsigned char *)src)[2]) << 16)
+		| ((sph_u32)(((const unsigned char *)src)[3]) << 24);
+#endif
+}
+
+/**
+ * Decode a 32-bit value from the provided buffer (little endian convention).
+ * The source buffer must be properly aligned.
+ *
+ * @param src   the source buffer (32-bit aligned)
+ * @return  the decoded value
+ */
+static SPH_INLINE sph_u32
+sph_dec32le_aligned(const void *src)
+{
+#if SPH_LITTLE_ENDIAN
+	return *(const sph_u32 *)src;
+#elif SPH_BIG_ENDIAN
+#if SPH_SPARCV9_GCC && !SPH_NO_ASM
+	sph_u32 tmp;
+
+	__asm__ __volatile__ ("lda [%1]0x88,%0" : "=r" (tmp) : "r" (src));
+	return tmp;
+/*
+ * Not worth it generally.
+ *
+#elif (SPH_PPC32_GCC || SPH_PPC64_GCC) && !SPH_NO_ASM
+	sph_u32 tmp;
+
+	__asm__ __volatile__ ("lwbrx %0,0,%1" : "=r" (tmp) : "r" (src));
+	return tmp;
+ */
+#else
+	return sph_bswap32(*(const sph_u32 *)src);
+#endif
+#else
+	return (sph_u32)(((const unsigned char *)src)[0])
+		| ((sph_u32)(((const unsigned char *)src)[1]) << 8)
+		| ((sph_u32)(((const unsigned char *)src)[2]) << 16)
+		| ((sph_u32)(((const unsigned char *)src)[3]) << 24);
+#endif
+}
+
+#if SPH_64
+
+/**
+ * Encode a 64-bit value into the provided buffer (big endian convention).
+ *
+ * @param dst   the destination buffer
+ * @param val   the 64-bit value to encode
+ */
+static SPH_INLINE void
+sph_enc64be(void *dst, sph_u64 val)
+{
+#if defined SPH_UPTR
+#if SPH_UNALIGNED
+#if SPH_LITTLE_ENDIAN
+	val = sph_bswap64(val);
+#endif
+	*(sph_u64 *)dst = val;
+#else
+	if (((SPH_UPTR)dst & 7) == 0) {
+#if SPH_LITTLE_ENDIAN
+		val = sph_bswap64(val);
+#endif
+		*(sph_u64 *)dst = val;
+	} else {
+		((unsigned char *)dst)[0] = (val >> 56);
+		((unsigned char *)dst)[1] = (val >> 48);
+		((unsigned char *)dst)[2] = (val >> 40);
+		((unsigned char *)dst)[3] = (val >> 32);
+		((unsigned char *)dst)[4] = (val >> 24);
+		((unsigned char *)dst)[5] = (val >> 16);
+		((unsigned char *)dst)[6] = (val >> 8);
+		((unsigned char *)dst)[7] = val;
+	}
+#endif
+#else
+	((unsigned char *)dst)[0] = (val >> 56);
+	((unsigned char *)dst)[1] = (val >> 48);
+	((unsigned char *)dst)[2] = (val >> 40);
+	((unsigned char *)dst)[3] = (val >> 32);
+	((unsigned char *)dst)[4] = (val >> 24);
+	((unsigned char *)dst)[5] = (val >> 16);
+	((unsigned char *)dst)[6] = (val >> 8);
+	((unsigned char *)dst)[7] = val;
+#endif
+}
+
+/**
+ * Encode a 64-bit value into the provided buffer (big endian convention).
+ * The destination buffer must be properly aligned.
+ *
+ * @param dst   the destination buffer (64-bit aligned)
+ * @param val   the value to encode
+ */
+static SPH_INLINE void
+sph_enc64be_aligned(void *dst, sph_u64 val)
+{
+#if SPH_LITTLE_ENDIAN
+	*(sph_u64 *)dst = sph_bswap64(val);
+#elif SPH_BIG_ENDIAN
+	*(sph_u64 *)dst = val;
+#else
+	((unsigned char *)dst)[0] = (val >> 56);
+	((unsigned char *)dst)[1] = (val >> 48);
+	((unsigned char *)dst)[2] = (val >> 40);
+	((unsigned char *)dst)[3] = (val >> 32);
+	((unsigned char *)dst)[4] = (val >> 24);
+	((unsigned char *)dst)[5] = (val >> 16);
+	((unsigned char *)dst)[6] = (val >> 8);
+	((unsigned char *)dst)[7] = val;
+#endif
+}
+
+/**
+ * Decode a 64-bit value from the provided buffer (big endian convention).
+ *
+ * @param src   the source buffer
+ * @return  the decoded value
+ */
+static SPH_INLINE sph_u64
+sph_dec64be(const void *src)
+{
+#if defined SPH_UPTR
+#if SPH_UNALIGNED
+#if SPH_LITTLE_ENDIAN
+	return sph_bswap64(*(const sph_u64 *)src);
+#else
+	return *(const sph_u64 *)src;
+#endif
+#else
+	if (((SPH_UPTR)src & 7) == 0) {
+#if SPH_LITTLE_ENDIAN
+		return sph_bswap64(*(const sph_u64 *)src);
+#else
+		return *(const sph_u64 *)src;
+#endif
+	} else {
+		return ((sph_u64)(((const unsigned char *)src)[0]) << 56)
+			| ((sph_u64)(((const unsigned char *)src)[1]) << 48)
+			| ((sph_u64)(((const unsigned char *)src)[2]) << 40)
+			| ((sph_u64)(((const unsigned char *)src)[3]) << 32)
+			| ((sph_u64)(((const unsigned char *)src)[4]) << 24)
+			| ((sph_u64)(((const unsigned char *)src)[5]) << 16)
+			| ((sph_u64)(((const unsigned char *)src)[6]) << 8)
+			| (sph_u64)(((const unsigned char *)src)[7]);
+	}
+#endif
+#else
+	return ((sph_u64)(((const unsigned char *)src)[0]) << 56)
+		| ((sph_u64)(((const unsigned char *)src)[1]) << 48)
+		| ((sph_u64)(((const unsigned char *)src)[2]) << 40)
+		| ((sph_u64)(((const unsigned char *)src)[3]) << 32)
+		| ((sph_u64)(((const unsigned char *)src)[4]) << 24)
+		| ((sph_u64)(((const unsigned char *)src)[5]) << 16)
+		| ((sph_u64)(((const unsigned char *)src)[6]) << 8)
+		| (sph_u64)(((const unsigned char *)src)[7]);
+#endif
+}
+
+/**
+ * Decode a 64-bit value from the provided buffer (big endian convention).
+ * The source buffer must be properly aligned.
+ *
+ * @param src   the source buffer (64-bit aligned)
+ * @return  the decoded value
+ */
+static SPH_INLINE sph_u64
+sph_dec64be_aligned(const void *src)
+{
+#if SPH_LITTLE_ENDIAN
+	return sph_bswap64(*(const sph_u64 *)src);
+#elif SPH_BIG_ENDIAN
+	return *(const sph_u64 *)src;
+#else
+	return ((sph_u64)(((const unsigned char *)src)[0]) << 56)
+		| ((sph_u64)(((const unsigned char *)src)[1]) << 48)
+		| ((sph_u64)(((const unsigned char *)src)[2]) << 40)
+		| ((sph_u64)(((const unsigned char *)src)[3]) << 32)
+		| ((sph_u64)(((const unsigned char *)src)[4]) << 24)
+		| ((sph_u64)(((const unsigned char *)src)[5]) << 16)
+		| ((sph_u64)(((const unsigned char *)src)[6]) << 8)
+		| (sph_u64)(((const unsigned char *)src)[7]);
+#endif
+}
+
+/**
+ * Encode a 64-bit value into the provided buffer (little endian convention).
+ *
+ * @param dst   the destination buffer
+ * @param val   the 64-bit value to encode
+ */
+static SPH_INLINE void
+sph_enc64le(void *dst, sph_u64 val)
+{
+#if defined SPH_UPTR
+#if SPH_UNALIGNED
+#if SPH_BIG_ENDIAN
+	val = sph_bswap64(val);
+#endif
+	*(sph_u64 *)dst = val;
+#else
+	if (((SPH_UPTR)dst & 7) == 0) {
+#if SPH_BIG_ENDIAN
+		val = sph_bswap64(val);
+#endif
+		*(sph_u64 *)dst = val;
+	} else {
+		((unsigned char *)dst)[0] = val;
+		((unsigned char *)dst)[1] = (val >> 8);
+		((unsigned char *)dst)[2] = (val >> 16);
+		((unsigned char *)dst)[3] = (val >> 24);
+		((unsigned char *)dst)[4] = (val >> 32);
+		((unsigned char *)dst)[5] = (val >> 40);
+		((unsigned char *)dst)[6] = (val >> 48);
+		((unsigned char *)dst)[7] = (val >> 56);
+	}
+#endif
+#else
+	((unsigned char *)dst)[0] = val;
+	((unsigned char *)dst)[1] = (val >> 8);
+	((unsigned char *)dst)[2] = (val >> 16);
+	((unsigned char *)dst)[3] = (val >> 24);
+	((unsigned char *)dst)[4] = (val >> 32);
+	((unsigned char *)dst)[5] = (val >> 40);
+	((unsigned char *)dst)[6] = (val >> 48);
+	((unsigned char *)dst)[7] = (val >> 56);
+#endif
+}
+
+/**
+ * Encode a 64-bit value into the provided buffer (little endian convention).
+ * The destination buffer must be properly aligned.
+ *
+ * @param dst   the destination buffer (64-bit aligned)
+ * @param val   the value to encode
+ */
+static SPH_INLINE void
+sph_enc64le_aligned(void *dst, sph_u64 val)
+{
+#if SPH_LITTLE_ENDIAN
+	*(sph_u64 *)dst = val;
+#elif SPH_BIG_ENDIAN
+	*(sph_u64 *)dst = sph_bswap64(val);
+#else
+	((unsigned char *)dst)[0] = val;
+	((unsigned char *)dst)[1] = (val >> 8);
+	((unsigned char *)dst)[2] = (val >> 16);
+	((unsigned char *)dst)[3] = (val >> 24);
+	((unsigned char *)dst)[4] = (val >> 32);
+	((unsigned char *)dst)[5] = (val >> 40);
+	((unsigned char *)dst)[6] = (val >> 48);
+	((unsigned char *)dst)[7] = (val >> 56);
+#endif
+}
+
+/**
+ * Decode a 64-bit value from the provided buffer (little endian convention).
+ *
+ * @param src   the source buffer
+ * @return  the decoded value
+ */
+static SPH_INLINE sph_u64
+sph_dec64le(const void *src)
+{
+#if defined SPH_UPTR
+#if SPH_UNALIGNED
+#if SPH_BIG_ENDIAN
+	return sph_bswap64(*(const sph_u64 *)src);
+#else
+	return *(const sph_u64 *)src;
+#endif
+#else
+	if (((SPH_UPTR)src & 7) == 0) {
+#if SPH_BIG_ENDIAN
+#if SPH_SPARCV9_GCC_64 && !SPH_NO_ASM
+		sph_u64 tmp;
+
+		__asm__ __volatile__ (
+			"ldxa [%1]0x88,%0" : "=r" (tmp) : "r" (src));
+		return tmp;
+/*
+ * Not worth it generally.
+ *
+#elif SPH_PPC32_GCC && !SPH_NO_ASM
+		return (sph_u64)sph_dec32le_aligned(src)
+			| ((sph_u64)sph_dec32le_aligned(
+				(const char *)src + 4) << 32);
+#elif SPH_PPC64_GCC && !SPH_NO_ASM
+		sph_u64 tmp;
+
+		__asm__ __volatile__ (
+			"ldbrx %0,0,%1" : "=r" (tmp) : "r" (src));
+		return tmp;
+ */
+#else
+		return sph_bswap64(*(const sph_u64 *)src);
+#endif
+#else
+		return *(const sph_u64 *)src;
+#endif
+	} else {
+		return (sph_u64)(((const unsigned char *)src)[0])
+			| ((sph_u64)(((const unsigned char *)src)[1]) << 8)
+			| ((sph_u64)(((const unsigned char *)src)[2]) << 16)
+			| ((sph_u64)(((const unsigned char *)src)[3]) << 24)
+			| ((sph_u64)(((const unsigned char *)src)[4]) << 32)
+			| ((sph_u64)(((const unsigned char *)src)[5]) << 40)
+			| ((sph_u64)(((const unsigned char *)src)[6]) << 48)
+			| ((sph_u64)(((const unsigned char *)src)[7]) << 56);
+	}
+#endif
+#else
+	return (sph_u64)(((const unsigned char *)src)[0])
+		| ((sph_u64)(((const unsigned char *)src)[1]) << 8)
+		| ((sph_u64)(((const unsigned char *)src)[2]) << 16)
+		| ((sph_u64)(((const unsigned char *)src)[3]) << 24)
+		| ((sph_u64)(((const unsigned char *)src)[4]) << 32)
+		| ((sph_u64)(((const unsigned char *)src)[5]) << 40)
+		| ((sph_u64)(((const unsigned char *)src)[6]) << 48)
+		| ((sph_u64)(((const unsigned char *)src)[7]) << 56);
+#endif
+}
+
+/**
+ * Decode a 64-bit value from the provided buffer (little endian convention).
+ * The source buffer must be properly aligned.
+ *
+ * @param src   the source buffer (64-bit aligned)
+ * @return  the decoded value
+ */
+static SPH_INLINE sph_u64
+sph_dec64le_aligned(const void *src)
+{
+#if SPH_LITTLE_ENDIAN
+	return *(const sph_u64 *)src;
+#elif SPH_BIG_ENDIAN
+#if SPH_SPARCV9_GCC_64 && !SPH_NO_ASM
+	sph_u64 tmp;
+
+	__asm__ __volatile__ ("ldxa [%1]0x88,%0" : "=r" (tmp) : "r" (src));
+	return tmp;
+/*
+ * Not worth it generally.
+ *
+#elif SPH_PPC32_GCC && !SPH_NO_ASM
+	return (sph_u64)sph_dec32le_aligned(src)
+		| ((sph_u64)sph_dec32le_aligned((const char *)src + 4) << 32);
+#elif SPH_PPC64_GCC && !SPH_NO_ASM
+	sph_u64 tmp;
+
+	__asm__ __volatile__ ("ldbrx %0,0,%1" : "=r" (tmp) : "r" (src));
+	return tmp;
+ */
+#else
+	return sph_bswap64(*(const sph_u64 *)src);
+#endif
+#else
+	return (sph_u64)(((const unsigned char *)src)[0])
+		| ((sph_u64)(((const unsigned char *)src)[1]) << 8)
+		| ((sph_u64)(((const unsigned char *)src)[2]) << 16)
+		| ((sph_u64)(((const unsigned char *)src)[3]) << 24)
+		| ((sph_u64)(((const unsigned char *)src)[4]) << 32)
+		| ((sph_u64)(((const unsigned char *)src)[5]) << 40)
+		| ((sph_u64)(((const unsigned char *)src)[6]) << 48)
+		| ((sph_u64)(((const unsigned char *)src)[7]) << 56);
+#endif
+}
+
+#endif
+
+#endif /* Doxygen excluded block */
+
+#endif
diff --git a/src/hash.h b/src/hash.h
index eacb8f04fef37..563f75c98df8a 100644
--- a/src/hash.h
+++ b/src/hash.h
@@ -15,6 +15,16 @@
 
 #include <vector>
 
+enum class HashAlgorithm: unsigned int {
+    SHA256,
+    SHA256d,
+    RIPEMD160,
+    HASH160,
+    KECCAK,
+
+    NUM_HASH_ALGOS,
+};
+
 typedef uint256 ChainCode;
 
 /** A hasher class for Bitcoin's 256-bit hash (double SHA-256). */
diff --git a/src/miner.cpp b/src/miner.cpp
index a12dcec2ce7e3..72e85998668ef 100644
--- a/src/miner.cpp
+++ b/src/miner.cpp
@@ -60,7 +60,7 @@ class ScoreCompare
 int64_t UpdateTime(CBlockHeader* pblock, const Consensus::Params& consensusParams, const CBlockIndex* pindexPrev)
 {
     int64_t nOldTime = pblock->nTime;
-    int64_t nNewTime = std::max(pindexPrev->GetMedianTimePast()+1, GetAdjustedTime());
+    int64_t nNewTime = std::max(pindexPrev->GetEarliestNextBlockTime(consensusParams), GetAdjustedTime());
 
     if (nOldTime < nNewTime)
         pblock->nTime = nNewTime;
diff --git a/src/pow.cpp b/src/pow.cpp
index e57fd866f8a62..36f740537d7b6 100644
--- a/src/pow.cpp
+++ b/src/pow.cpp
@@ -18,6 +18,8 @@ unsigned int GetNextWorkRequired(const CBlockIndex* pindexLast, const CBlockHead
     if (pindexLast == NULL)
         return nProofOfWorkLimit;
 
+    uint32_t nBits;
+
     // Only change once per difficulty adjustment interval
     if ((pindexLast->nHeight+1) % params.DifficultyAdjustmentInterval() != 0)
     {
@@ -30,23 +32,38 @@ unsigned int GetNextWorkRequired(const CBlockIndex* pindexLast, const CBlockHead
                 return nProofOfWorkLimit;
             else
             {
-                // Return the last non-special-min-difficulty-rules-block
+                // Look back to the last non-special-min-difficulty-rules-block
                 const CBlockIndex* pindex = pindexLast;
                 while (pindex->pprev && pindex->nHeight % params.DifficultyAdjustmentInterval() != 0 && pindex->nBits == nProofOfWorkLimit)
                     pindex = pindex->pprev;
-                return pindex->nBits;
+                nBits = pindex->nBits;
             }
+        } else {
+            nBits = pindexLast->nBits;
         }
-        return pindexLast->nBits;
+    } else {
+        // Go back by what we want to be 14 days worth of blocks
+        int nHeightFirst = pindexLast->nHeight - (params.DifficultyAdjustmentInterval()-1);
+        assert(nHeightFirst >= 0);
+        const CBlockIndex* pindexFirst = pindexLast->GetAncestor(nHeightFirst);
+        assert(pindexFirst);
+
+        nBits = CalculateNextWorkRequired(pindexLast, pindexFirst->GetBlockTime(), params);
     }
 
-    // Go back by what we want to be 14 days worth of blocks
-    int nHeightFirst = pindexLast->nHeight - (params.DifficultyAdjustmentInterval()-1);
-    assert(nHeightFirst >= 0);
-    const CBlockIndex* pindexFirst = pindexLast->GetAncestor(nHeightFirst);
-    assert(pindexFirst);
+    if (params.PowAlgorithmForTime(pblock->nTime) != params.PowAlgorithmForTime(pindexLast->nTime)) {
+        // Adjust target for PoW change
+        arith_uint256 bnNew;
+        bnNew.SetCompact(nBits);
+        bnNew <<= params.nPowChangeTargetShift;
+        const arith_uint256 bnPowLimit = UintToArith256(params.powLimit);
+        if (bnNew > bnPowLimit) {
+            bnNew = bnPowLimit;
+        }
+        nBits = bnNew.GetCompact();
+    }
 
-    return CalculateNextWorkRequired(pindexLast, pindexFirst->GetBlockTime(), params);
+    return nBits;
 }
 
 unsigned int CalculateNextWorkRequired(const CBlockIndex* pindexLast, int64_t nFirstBlockTime, const Consensus::Params& params)
diff --git a/src/primitives/block.cpp b/src/primitives/block.cpp
index 9a979094ccb16..4218b9ae4f5a1 100644
--- a/src/primitives/block.cpp
+++ b/src/primitives/block.cpp
@@ -8,11 +8,56 @@
 #include "hash.h"
 #include "tinyformat.h"
 #include "utilstrencodings.h"
+#include "chainparams.h"
+#include "consensus/params.h"
 #include "crypto/common.h"
+#include "crypto/sph_keccak.h"
+#include "streams.h"
+
+#include <cstdlib>
+
+uint256 CBlockHeader::GetHash(const Consensus::Params& consensusParams) const
+{
+    CDataStream ss(SER_GETHASH, PROTOCOL_VERSION);
+    ss << *this;
+
+    const auto pbegin = (const unsigned char *)&ss.begin()[0];
+    uint256 hash;
+
+    const HashAlgorithm algo = consensusParams.PowAlgorithmForTime(nTime);
+    switch (algo) {
+        case HashAlgorithm::SHA256:
+            CSHA256().Write(pbegin, ss.size()).Finalize((unsigned char*)&hash);
+            break;
+        case HashAlgorithm::SHA256d:
+            CHash256().Write(pbegin, ss.size()).Finalize((unsigned char*)&hash);
+            break;
+        case HashAlgorithm::RIPEMD160:
+            CRIPEMD160().Write(pbegin, ss.size()).Finalize((unsigned char*)&hash);
+            break;
+        case HashAlgorithm::HASH160:
+            CHash160().Write(pbegin, ss.size()).Finalize((unsigned char*)&hash);
+            break;
+        case HashAlgorithm::KECCAK:
+        {
+            sph_keccak256_context ctx_keccak;
+            sph_keccak256_init(&ctx_keccak);
+            sph_keccak256(&ctx_keccak, pbegin, ss.size());
+            sph_keccak256_close(&ctx_keccak, &hash);
+            break;
+        }
+        case HashAlgorithm::NUM_HASH_ALGOS:
+            // Should be impossible
+            abort();
+    }
+
+    return hash;
+}
 
 uint256 CBlockHeader::GetHash() const
 {
-    return SerializeHash(*this);
+    const Consensus::Params& consensusParams = Params().GetConsensus();
+    return GetHash(consensusParams);
 }
 
 std::string CBlock::ToString() const
diff --git a/src/primitives/block.h b/src/primitives/block.h
index 4c6eb20ad5e3f..aacf4f5abc570 100644
--- a/src/primitives/block.h
+++ b/src/primitives/block.h
@@ -10,6 +10,10 @@
 #include "serialize.h"
 #include "uint256.h"
 
+namespace Consensus {
+    struct Params;
+}
+
 /** Nodes collect new transactions into a block, hash them into a hash tree,
  * and scan through nonce values to make the block's hash satisfy proof-of-work
  * requirements.  When they solve the proof-of-work, they broadcast the block
@@ -60,6 +64,7 @@ class CBlockHeader
         return (nBits == 0);
     }
 
+    uint256 GetHash(const Consensus::Params&) const;
     uint256 GetHash() const;
 
     int64_t GetBlockTime() const
diff --git a/src/rpc/mining.cpp b/src/rpc/mining.cpp
index 33e234a95efbf..eb26bf0eb72a2 100644
--- a/src/rpc/mining.cpp
+++ b/src/rpc/mining.cpp
@@ -678,7 +678,7 @@ UniValue getblocktemplate(const JSONRPCRequest& request)
     result.push_back(Pair("coinbasevalue", (int64_t)pblock->vtx[0]->vout[0].nValue));
     result.push_back(Pair("longpollid", chainActive.Tip()->GetBlockHash().GetHex() + i64tostr(nTransactionsUpdatedLast)));
     result.push_back(Pair("target", hashTarget.GetHex()));
-    result.push_back(Pair("mintime", (int64_t)pindexPrev->GetMedianTimePast()+1));
+    result.push_back(Pair("mintime", (int64_t)pindexPrev->GetEarliestNextBlockTime(consensusParams)));
     result.push_back(Pair("mutable", aMutable));
     result.push_back(Pair("noncerange", "00000000ffffffff"));
     int64_t nSigOpLimit = MAX_BLOCK_SIGOPS_COST;
diff --git a/src/test/miner_tests.cpp b/src/test/miner_tests.cpp
index f856d8a91a704..33e193d21eaee 100644
--- a/src/test/miner_tests.cpp
+++ b/src/test/miner_tests.cpp
@@ -6,6 +6,7 @@
 #include "coins.h"
 #include "consensus/consensus.h"
 #include "consensus/merkle.h"
+#include "consensus/params.h"
 #include "consensus/validation.h"
 #include "validation.h"
 #include "miner.h"
@@ -187,6 +188,7 @@ BOOST_AUTO_TEST_CASE(CreateNewBlock_validity)
 {
     // Note that by default, these tests run with size accounting enabled.
     const CChainParams& chainparams = Params(CBaseChainParams::MAIN);
+    const Consensus::Params& consensusParams = chainparams.GetConsensus();
     CScript scriptPubKey = CScript() << ParseHex("04678afdb0fe5548271967f1a67130b7105cd6a828e03909a67962e0ea1f61deb649f6bc3f4cef38c4f35504e51ec112de5c384df7ba0b8d578a4c702b6bf11d5f") << OP_CHECKSIG;
     std::unique_ptr<CBlockTemplate> pblocktemplate;
     CMutableTransaction tx,tx2;
@@ -211,7 +213,7 @@ BOOST_AUTO_TEST_CASE(CreateNewBlock_validity)
     {
         CBlock *pblock = &pblocktemplate->block; // pointer for convenience
         pblock->nVersion = 1;
-        pblock->nTime = chainActive.Tip()->GetMedianTimePast()+1;
+        pblock->nTime = chainActive.Tip()->GetEarliestNextBlockTime(consensusParams);
         CMutableTransaction txCoinbase(*pblock->vtx[0]);
         txCoinbase.nVersion = 1;
         txCoinbase.vin[0].scriptSig = CScript();
@@ -393,7 +395,7 @@ BOOST_AUTO_TEST_CASE(CreateNewBlock_validity)
     }
 
     // non-final txs in mempool
-    SetMockTime(chainActive.Tip()->GetMedianTimePast()+1);
+    SetMockTime(chainActive.Tip()->GetEarliestNextBlockTime(consensusParams));
     int flags = LOCKTIME_VERIFY_SEQUENCE|LOCKTIME_MEDIAN_TIME_PAST;
     // height map
     std::vector<int> prevheights;
diff --git a/src/test/skiplist_tests.cpp b/src/test/skiplist_tests.cpp
index 0b2fe0ef9db1e..db12c5cf791da 100644
--- a/src/test/skiplist_tests.cpp
+++ b/src/test/skiplist_tests.cpp
@@ -3,6 +3,8 @@
 // file COPYING or http://www.opensource.org/licenses/mit-license.php.
 
 #include "chain.h"
+#include "chainparams.h"
+#include "consensus/params.h"
 #include "util.h"
 #include "test/test_bitcoin.h"
 #include "test/test_random.h"
@@ -102,6 +104,7 @@ BOOST_AUTO_TEST_CASE(getlocator_test)
 
 BOOST_AUTO_TEST_CASE(findearliestatleast_test)
 {
+    const Consensus::Params& consensusParams = Params(CBaseChainParams::MAIN).GetConsensus();
     std::vector<uint256> vHashMain(100000);
     std::vector<CBlockIndex> vBlocksMain(100000);
     for (unsigned int i=0; i<vBlocksMain.size(); i++) {
@@ -115,7 +118,7 @@ BOOST_AUTO_TEST_CASE(findearliestatleast_test)
             vBlocksMain[i].nTimeMax = i;
         } else {
             // randomly choose something in the range [MTP, MTP*2]
-            int64_t medianTimePast = vBlocksMain[i].GetMedianTimePast();
+            int64_t medianTimePast = vBlocksMain[i].GetEarliestNextBlockTime(consensusParams);
             int r = insecure_rand() % medianTimePast;
             vBlocksMain[i].nTime = r + medianTimePast;
             vBlocksMain[i].nTimeMax = std::max(vBlocksMain[i].nTime, vBlocksMain[i-1].nTimeMax);
diff --git a/src/validation.cpp b/src/validation.cpp
index c9135c442b1a4..55ce102155291 100644
--- a/src/validation.cpp
+++ b/src/validation.cpp
@@ -2981,8 +2981,12 @@ bool ContextualCheckBlockHeader(const CBlockHeader& block, CValidationState& sta
     if (block.nBits != GetNextWorkRequired(pindexPrev, &block, consensusParams))
         return state.DoS(100, false, REJECT_INVALID, "bad-diffbits", false, "incorrect proof of work");
 
+    if (consensusParams.PowAlgorithmForTime(block.GetBlockTime()) != consensusParams.PowAlgorithmForTime(pindexPrev->GetBlockTime()) && block.GetBlockTime() < pindexPrev->GetBlockTime()) {
+        return state.Invalid(false, REJECT_INVALID, "pow-reversed", "cannot reverse PoW change");
+    }
+
     // Check timestamp against prev
-    if (block.GetBlockTime() <= pindexPrev->GetMedianTimePast())
+    if (block.GetBlockTime() < pindexPrev->GetEarliestNextBlockTime(consensusParams))
         return state.Invalid(false, REJECT_INVALID, "time-too-old", "block's timestamp is too early");
 
     // Check timestamp