-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Implemented Clojures persistent vectors in Nim
- Loading branch information
0 parents
commit d639830
Showing
3 changed files
with
329 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
# Persistent Vector | ||
This is an implementation of Clojures persistent vectors in Nim. The default branching factor is 32 as in Clojure, but can be changed by using the boot switch `-d:persvectorbits=n` where n is the power of two to use as the branching factor (and thus defaults to 5 for 32-way branching). | ||
|
||
For more information see the vector.nim file and it's doc-strings. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
## This is a file which shows most of the implemented features of the | ||
## persistent vector data structure. It was created to test the algorithms | ||
## during the implementation | ||
|
||
import vector | ||
|
||
# Create an empty vector of ints and a sequence of vectors | ||
var vec = initVector[int]() | ||
var vecs = @[vec] | ||
|
||
# Add numbers to the vector and store the produced vectors in the sequence. Note that this was tested with a node size of 2 or 4 to increase the branching. 64 numbers only ever branches once. | ||
#for i in 0..1_000_000: | ||
for i in 0..64: | ||
vecs.add(vecs[vecs.high].add i) | ||
|
||
# Add some vectors which delete a field | ||
vecs.add vecs[vecs.high].delete | ||
vecs.add vecs[vecs.high].delete | ||
|
||
# Add a vector that has a field updated: | ||
vecs.add vecs[vecs.high-5].update(10, 100) | ||
|
||
# Grab a slice from the vector (Note that this returns a regular sequence) | ||
echo vecs[vecs.high][20..30] | ||
|
||
# Create a completely new vector created from a sequence | ||
var strVec = @["Hello", "world!", "How", "is", "it", "going?", "Persistent", "vectors", "are", "cool!"].toPersistentVector | ||
# And create a new update of it | ||
var neatVec = strVec.update(strVec.len-1, "neat!") | ||
|
||
for nvec in vecs: | ||
echo "len: " & $nvec.len | ||
echo $nvec | ||
echo "---" | ||
|
||
echo "len: " & $strVec.len | ||
echo $strVec | ||
echo "---" | ||
|
||
echo "len: " & $neatVec.len | ||
echo $neatVec | ||
|
||
|
||
#echo getOccupiedMem() | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,279 @@ | ||
## This module implements Clojures persistent vectors with tail optimisation. | ||
## For more information about those please see the blogpost series: | ||
## http://hypirion.com/musings/understanding-persistent-vector-pt-1 which | ||
## was used as a reference for this implementation. | ||
## | ||
## Persistent, or immutable, data structures are important for many functional | ||
## workloads. Instead of changing the data within them they have a structure | ||
## which shares nodes with similar data so a minimal amount of data needs to | ||
## be copied. This is not however only used for functional programming and have | ||
## benefits for things like asynchronous programming and even systems which | ||
## stores it's state to provide things like an undo feature. | ||
|
||
const persvectorbits {.intdefine.}: int = 5 | ||
|
||
const | ||
## These constants define how the node sharing is done. For Clojure BITS is | ||
## set to 5, which gives 32 elements per node. Fewer bits means less copied | ||
## data but a deeper tree and more work in maintaining it. More bits means | ||
## more copied data but a shallower tree and less work. 32 is seen to be a | ||
## good compromise between memory and computational performance. | ||
BITS = persvectorbits | ||
WIDTH = 1 shl BITS | ||
MASK = WIDTH - 1 | ||
|
||
|
||
type | ||
NodeKind = enum | ||
leaf, branch | ||
VectorNode[T] = ref VectorNodeObj[T] | ||
VectorNodeObj[T] = object | ||
case kind: NodeKind | ||
of leaf: | ||
data: seq[T] | ||
of branch: | ||
children: seq[VectorNode[T]] | ||
PersistentVector*[T] = ref object | ||
tail: seq[T] | ||
size: int | ||
shifts: int | ||
tree: VectorNode[T] | ||
|
||
proc copyRef[T](theSeq: seq[T]): seq[T] {.noSideEffect.} = | ||
shallowCopy(result, theSeq) | ||
|
||
|
||
proc initVector*[T](): PersistentVector[T] {.noSideEffect.} = | ||
new result | ||
result.tail = @[] | ||
|
||
proc add*[T](vec: PersistentVector[T], elem: T): PersistentVector[T] {.noSideEffect.} = | ||
## Returns a new persistent vector with the element `elem` inserted at the end | ||
new result | ||
result.size = vec.size + 1 | ||
result.shifts = vec.shifts | ||
if vec.tail.len != WIDTH: | ||
result.tail.deepCopy vec.tail | ||
result.tail.add elem | ||
result.tree = vec.tree | ||
else: | ||
if vec.tree == nil: | ||
result.tree = VectorNode[T](kind: leaf, data: copyRef(vec.tail)) | ||
else: | ||
if vec.tree.kind == leaf: | ||
result.tree = VectorNode[T](kind: branch, children: @[vec.tree]) | ||
var newChild = VectorNode[T](kind: leaf, data: copyRef(vec.tail)) | ||
result.tree.children.add newChild | ||
result.shifts = BITS | ||
else: | ||
var n = vec.size - vec.tail.len | ||
while (n and (WIDTH - 1)) == 0: | ||
n = n shr BITS | ||
if n == 1: | ||
result.tree = VectorNode[T](kind: branch, children: @[vec.tree]) | ||
var | ||
level = vec.shifts | ||
node = result.tree | ||
result.shifts = 0 | ||
while level > 0: | ||
var nnode = VectorNode[T](kind: branch, children: @[]) | ||
node.children.add nnode | ||
node = nnode | ||
level -= BITS | ||
result.shifts += BITS | ||
result.shifts += BITS | ||
node.children.add VectorNode[T](kind: leaf, data: copyRef(vec.tail)) | ||
else: | ||
result.tree = VectorNode[T](kind: branch) | ||
result.tree.children = vec.tree.children[0 .. ^1] | ||
var | ||
node = result.tree | ||
level = vec.shifts | ||
while level > 0: | ||
let index = ((vec.size - vec.tail.len) shr level) and MASK | ||
if node.children.high < index: | ||
while level > BITS: | ||
level -= BITS | ||
node.children.add VectorNode[T](kind: branch, children: @[]) | ||
node = node.children[node.children.high] | ||
node.children.add VectorNode[T](kind: leaf, data: copyRef(vec.tail)) | ||
break | ||
else: | ||
let oldNode = node.children[index] | ||
node.children[index] = VectorNode[T](kind: branch) | ||
node = node.children[index] | ||
node.children = oldNode.children[0 .. ^1] | ||
level -= BITS | ||
|
||
result.tail = @[elem] | ||
|
||
proc update*[T](vec: PersistentVector[T], key: int, elem: T): PersistentVector[T] {.noSideEffect.} = | ||
## Returns a new persistent vector with the element at `key` changed to `elem` | ||
new result | ||
result.size = vec.size | ||
result.shifts = vec.shifts | ||
if key >= vec.size - vec.tail.len: | ||
result.tail = vec.tail[0 .. ^1] | ||
result.tree = vec.tree | ||
result.tail[key - (vec.size - vec.tail.len)] = elem | ||
else: | ||
result.tail = vec.tail | ||
result.tree = VectorNode[T](kind: vec.tree.kind) | ||
if result.tree.kind == leaf: | ||
result.tree.data = vec.tree.data[0 .. ^1] | ||
result.tree.data[key] = elem | ||
else: | ||
result.tree.children = vec.tree.children[0 .. ^1] | ||
var | ||
node = result.tree | ||
level = vec.shifts | ||
while level > 0: | ||
let | ||
index = (key shr level) and MASK | ||
oldNode = node.children[index] | ||
node.children[index] = VectorNode[T](kind: oldNode.kind) | ||
node = node.children[index] | ||
if oldNode.kind == branch: | ||
node.children = oldNode.children[0 .. ^1] | ||
else: | ||
node.data = oldNode.data[0 .. ^1] | ||
level -= BITS | ||
node.data[key and MASK] = elem | ||
|
||
proc delete*[T](vec: PersistentVector[T]): PersistentVector[T] {.noSideEffect.} = | ||
## Returns a new persistent vector with the last element of the given vector missing. | ||
new result | ||
result.size = vec.size - 1 | ||
if vec.tail.len > 1: | ||
result.shifts = vec.shifts | ||
result.tree = vec.tree | ||
result.tail = vec.tail[0 .. ^2] | ||
else: | ||
if vec.tree.kind == leaf: | ||
result.tree = nil | ||
result.tail = copyRef(vec.tree.data) | ||
else: | ||
var n = result.size - WIDTH | ||
while (n and (WIDTH - 1)) == 0: | ||
n = n shr BITS | ||
# If new size of tree is power of WIDTH, the right branch of the tree only consists of one node to be promoted. | ||
if n == 1: | ||
result.tree = vec.tree.children[0] | ||
var node = vec.tree.children[1] | ||
while node.kind != leaf: | ||
node = node.children[0] | ||
result.tail = copyRef(node.data) | ||
result.shifts = vec.shifts - BITS | ||
else: | ||
result.shifts = vec.shifts | ||
var vector = result | ||
proc promoteRight[T](node: VectorNode[T]): VectorNode[T] = | ||
if node.kind == branch: | ||
let newNode = promoteRight(node.children[node.children.high]) | ||
if newNode == nil and node.children.len == 1: | ||
return nil | ||
result = VectorNode[T](kind: branch, children: node.children[0 .. ^2]) | ||
if newNode != nil: | ||
result.children.add newNode | ||
else: | ||
vector.tail = copyRef(node.data) | ||
result = nil | ||
result.tree = promoteRight(vec.tree) | ||
|
||
proc toPersistentVector*[T](s: seq[T]): PersistentVector[T] = | ||
## Returns a new persistent vector that contains all elements in the passed sequence. This copies all the data from the sequence. | ||
result = PersistentVector[T](size: s.len) | ||
var nodes: seq[VectorNode[T]] = newSeq[VectorNode[T]](s.len shr BITS) | ||
result.tail = s[s.len - (s.len and MASK) .. ^1] | ||
for i in 0..nodes.high: | ||
nodes[i] = VectorNode[T](kind: leaf, data: s[WIDTH*i .. WIDTH*(i+1)-1]) | ||
while nodes.len > WIDTH: | ||
result.shifts += BITS | ||
var newNodes = newSeq[VectorNode[T]](nodes.len shr BITS + (if (nodes.len and MASK) != 0: 1 else: 0)) | ||
for j in 0..<newNodes.high: | ||
newNodes[j] = VectorNode[T](kind: branch, children: nodes[j*WIDTH .. WIDTH*(j+1)-1]) | ||
newNodes[newNodes.high] = VectorNode[T](kind:branch, children: nodes[newNodes.high*WIDTH .. nodes.high]) | ||
nodes = newNodes | ||
if nodes.len == 1: | ||
result.tree = nodes[0] | ||
else: | ||
result.shifts += BITS | ||
result.tree = VectorNode[T](kind: branch, children: nodes) | ||
|
||
proc `[]`*[T](vec: PersistentVector[T], key: int): T {.inline, noSideEffect.} = | ||
## Access operator for persistent vectors | ||
if key >= vec.size - vec.tail.len: | ||
return vec.tail[key - (vec.size - vec.tail.len)] | ||
var | ||
level = vec.shifts | ||
node = vec.tree | ||
while level > 0: | ||
let index = (key shr level) and MASK | ||
node = node.children[index] | ||
level -= BITS | ||
return node.data[key and MASK] | ||
|
||
proc `[]`*[T](vec: PersistentVector[T], slice: Slice[int]): seq[T] {.inline, noSideEffect.} = | ||
## Optimised slice operator for persistent vectors, returns a sequence | ||
if slice.a >= vec.size - vec.tail.len: | ||
return vec.tail[slice.a - (vec.size - vec.tail.len) .. slice.b - (vec.size - vec.tail.len)] | ||
else: | ||
var | ||
i = slice.a | ||
e = min(slice.b, vec.size - vec.tail.len) | ||
result = newSeq[T](slice.b - slice.a + 1) | ||
while i < e: | ||
var | ||
level = vec.shifts | ||
node = vec.tree | ||
while level > 0: | ||
let index = (i shr level) and MASK | ||
node = node.children[index] | ||
level -= BITS | ||
let start:int = i and MASK | ||
for d in node.data[start .. node.data.high]: | ||
result[i-slice.a] = d | ||
i += 1 | ||
if i > e: | ||
break | ||
if i <= slice.b: | ||
result[i-slice.a .. slice.b-slice.a] = vec.tail[i - (vec.size - vec.tail.len) .. slice.b - (vec.size - vec.tail.len)] | ||
|
||
iterator items*[T](vec: PersistentVector[T]): T {.noSideEffect.} = | ||
## Optimised iterator for PersistentVector (could be optimised further) | ||
var i = 0 | ||
if vec.tree != nil: | ||
while i < vec.size - vec.tail.len: | ||
var | ||
level = vec.shifts | ||
node = vec.tree | ||
while level > 0: | ||
let index = (i shr level) and MASK | ||
node = node.children[index] | ||
level -= BITS | ||
for d in node.data: | ||
yield d | ||
i += WIDTH | ||
for d in vec.tail: | ||
yield d | ||
|
||
proc len*[T](vec: PersistentVector[T]): int {.noSideEffect.} = | ||
## Function to get length of a persistent vector (stored result, not calculated) | ||
return vec.size | ||
|
||
proc high*[T](vec: PersistentVector[T]): int {.noSideEffect.} = | ||
## Function to get the highest valid index of the persistent vector | ||
return vec.size-1 | ||
|
||
proc `$`*[T](vec: PersistentVector[T]): string {.noSideEffect.} = | ||
## Returns a string representation of the elements in the persistent vector. Equal to `$vec[0 .. ^1]` | ||
#return if vec.tree == nil: "nil" else: ($vec.shifts & " " & $vec.tree) | ||
return $vec[0 .. vec.high] | ||
|
||
proc `$`*(node: VectorNode): string {.noSideEffect.} = | ||
## Returns a string representation of a vector node, for debugging. | ||
if node.kind == leaf: | ||
return "l" & $cast[int](node.data) & ": " & $node.data | ||
else: | ||
return "b" & $cast[int](node) & ": " & $node.children | ||
|