From 203bd409325fbd6254ce0776e2bc1bdc3e2f7000 Mon Sep 17 00:00:00 2001 From: Mark Nadal Date: Tue, 16 Jan 2024 21:08:14 -0800 Subject: [PATCH] cleaned up Book results & sorting & caching --- gun.js | 62 ++++++++++++++++++++++-------------------------- rad.js | 40 +++++++++---------------------- src/book.js | 62 ++++++++++++++++++++++-------------------------- test/rad/book.js | 19 +++++++++++++++ 4 files changed, 88 insertions(+), 95 deletions(-) diff --git a/gun.js b/gun.js index 2328d01bf..69171f470 100644 --- a/gun.js +++ b/gun.js @@ -183,7 +183,7 @@ } function got(word, page){ var b = page.book, l, has, a, i; - if(l = from(page)){ has = l[got.i = i = spot(word, l, B.decode)]; } // TODO: POTENTIAL BUG! This assumes that each word on a page uses the same serializer/formatter/structure. + if(l = from(page)){ has = l[got.i = i = spot(word, l, B.decode)]; } // TODO: POTENTIAL BUG! This assumes that each word on a page uses the same serializer/formatter/structure. // TOOD: BUG!!! Not actually, but if we want to do non-exact radix-like closest-word lookups on a page, we need to check limbo & potentially sort first. // parseless may return -1 from actual value, so we may need to test both. // TODO: Double check? I think this is correct. if(has && word == has.word){ return (b.all[word] = has).is } if('string' != typeof has){ has = l[got.i = i+=1] } @@ -208,14 +208,14 @@ function from(a, t, l){ if('string' != typeof a.from){ return a.from } - //(l = a.from = (t = a.from||'').substring(1, t.length-1).split(t[0])).toString = join; // slot - (l = a.from = slot(t = t||a.from||'')).toString = join; + //(l = a.from = (t = a.from||'').substring(1, t.length-1).split(t[0])); // slot + (l = a.from = slot(t = t||a.from||'')); return l; } function list(each){ each = each || function(x){return x} - // TODO: BUG!!! in limbo items need to get situated before calling this, if there are any. (obviously, we shouldn't do it again if limbo has previously been sorted). - var i = 0, l = from(this)||[], w, r = [], p = this.book.parse || function(){}; - while(w = l[i++]){ r.push(each(slot(w)[1],p(w)||w,this)) } + var i = 0, l = sort(this), w, r = [], p = this.book.parse || function(){}; + //while(w = l[i++]){ r.push(each(slot(w)[1], p(w)||w, this)) } + while(w = l[i++]){ r.push(each(this.get(w = w.word||p(w)||w), w, this)) } // TODO: BUG! PERF? return r; } @@ -230,7 +230,7 @@ // MUST be an insert: has = b.all[word] = {word: word, is: is, page: page, substring: subt, toString: tot}; page.first = (page.first < word)? page.first : word; - if(!page.limbo){ (page.limbo = []).toString = join } + if(!page.limbo){ (page.limbo = []) } page.limbo.push(has); b(word, is); page.size += size(word) + size(is); @@ -240,24 +240,18 @@ function split(p, b){ // TODO: use closest hash instead of half. //console.time(); - // TODO: BUG???? May need to do a SORTED merge with FROM. - var i = 0, L = p.limbo, tmp; - //while(tmp = L[i++]){ } - var L = p.limbo = sort(p.limbo), l = L.length, i = l/2 >> 0, j = i, half = L[j], tmp; + var L = sort(p), l = L.length, i = l/2 >> 0, j = i, half = L[j], tmp; //console.timeEnd(); - var next = {limbo: [], first: half.substring(), size: 0, substring: sub, toString: to, book: b, get: b, read: list}, nl = next.limbo; - nl.toString = join; + var next = {first: half.substring(), size: 0, substring: sub, toString: to, book: b, get: b, read: list}, f = next.from = []; //console.time(); while(tmp = L[i++]){ - nl.push(tmp); + f.push(tmp); next.size += (tmp.is||'').length||1; tmp.page = next; } - //console.timeEnd(); - //console.time(); - p.limbo = p.limbo.slice(0, j); + //console.timeEnd(); console.time(); + p.from = p.from.slice(0, j); p.size -= next.size; - p.sort = 0; b.list.splice(spot(next.first, b.list)+1, 0, next); // TODO: BUG! Make sure next.first is decoded text. // TODO: BUG! spot may need parse too? //console.timeEnd(); if(b.split){ b.split(next, p) } @@ -285,27 +279,29 @@ } function sub(i,j){ return (this.first||this.word||B.decode((from(this)||'')[0]||'')).substring(i,j) } function to(){ return this.text = this.text || text(this) } - function join(){ return this.join('|') } - function text(p){ var l = p.limbo; // TODO: BUG??? Shouldn't any stringify cause limbo to be reset? - if(!l){ return (typeof p.from == 'string')? (p.from||'')+'' : '|'+p.from+'|' } - if(!p.from){ return p.limbo = null, '|'+((l && sort(l).join('|'))||'')+'|' } // TODO: p.limbo should be reset each time we "flush". - return '|'+mix(l, from(p), p).join('|')+'|'; // commenting out this sub-portion of code fixed a more basic test, but will probably cause a bug with a FROM + MEMORY. - } - function mix(l, f, p){ // TODO: IMPROVE PERFORMANCE!!!! l[j] = i is 5X+ faster than .push( - var j = 0, i; + function text(p){ // PERF: read->[*] : text->"*" no edit waste 1 time perf. + if(p.limbo){ sort(p) } // TODO: BUG? Empty page meaning? undef, '', '||'? + return ('string' == typeof p.from)? p.from : '|'+(p.from||[]).join('|')+'|'; + } + + function sort(p, l){ + var f = p.from = ('string' == typeof p.from)? slot(p.from) : p.from||[]; + if(!(l = l || p.limbo)){ return f } + return mix(p).sort(function(a,b){ + return (a.word||B.decode(''+a)) < (b.word||B.decode(''+b))? -1:1; + }); + } + function mix(p, l){ // TODO: IMPROVE PERFORMANCE!!!! l[j] = i is 5X+ faster than .push( + l = l || p.limbo || []; p.limbo = null; + var j = 0, i, f = p.from; while(i = l[j++]){ if(got(i.word, p)){ - f[got.i] = i; + f[got.i] = i; // TODO: Trick: allow for a GUN'S HAM CRDT hook here. } else { f.push(i); } } - return sort(f); - } - function sort(l){ //return l.sort(); - return l.sort(function(a,b){ - return (a.word||B.decode(''+a)) < (b.word||B.decode(''+b))? -1:1; - }); + return f; } B.encode = function(d, s, u){ s = s || "|"; u = u || String.fromCharCode(32); diff --git a/rad.js b/rad.js index e6c9fa936..18f6909f3 100644 --- a/rad.js +++ b/rad.js @@ -6,7 +6,7 @@ var log = opt.log || nope; var has = (sT.RAD.has || (sT.RAD.has = {}))[opt.file]; - if(has){ return has } + if(has){ return has } // TODO: BUG? Not reuses same instance? var r = function rad(word, is, reply){ r.word = word; if(!b){ start(word, is, reply); return r } if(is === undefined || 'function' == typeof is){ // THIS IS A READ: @@ -35,29 +35,18 @@ }) } - async function write(word, reply){ - log('write() word', word); + function write(word, reply){ var p = b.page(word), tmp; - if(tmp = p.saving){ reply && tmp.push(reply); return } p.saving = [reply]; - var S = +new Date; log(" writing", p.substring(), 'since last', S - p.saved, RAD.c, 'records', env.count++, 'mid-swap.'); + if(tmp=p.saving){(reply||!tmp.length)&&(p.saving=tmp.concat(reply));return} // TODO: PERF! Rogowski points out concat is slow. BUG??? I HAVE NO clue how/why this if statement being called from recursion yet not set to 0. + p.saving = ('function' == typeof reply)? [reply] : reply || []; get(p, function(err, disk){ - if(err){ log("ERR! in write() get() cb ", err); return } - log(' get() - p.saving ', (p.saving || []).length); - if(p.from && disk){ - log(" get() merge: p.from ", p.toString().slice(0, 40), " disk.length", disk?.length || 0); - } + if(err){ log("ERR! in write() get() cb ", err); return } // TODO: BUG!!! Unhandled, no callbacks called. p.from = disk || p.from; - // p.list = p.text = p.from = 0; - // p.first = p.first.word || p.first; tmp = p.saving; p.saving = []; - put(p, '' + p, function(err, ok){ - env.count--; p.saved = +new Date; log(" ...wrote %d bytes in %dms", ('' + p).length, (p.saved = +new Date) - S); - // TODO: BUG: Confirmed! Only calls back first. Need to fix + use perf hack from old RAD. + put(p, ''+p, function(err, ok){ sT.each(tmp, function(cb){ cb && cb(err, ok) }); - if(!p.saving.length){ p.saving = 0; return; } //p.saving = 0; // what? - // log({ tmp }); - console.log("hm?", word, reply+''); - write(word, reply); + tmp = p.saving; p.saving = 0; + if(tmp.length){ write(word, tmp) } }); }, p); } @@ -71,13 +60,13 @@ function get(file, cb){ var tmp; if(!file){ return } // TODO: HANDLE ERROR!! - if(file.from){ cb(null, file.from); return } // IS THIS LINE SAFE? ADD TESTS! + if(file.from){ cb(null, file.from); return } if(b&&1==b.list.length){ file.first = (file.first < '!')? file.first : '!'; } // TODO: BUG!!!! This cleanly makes for a common first file, but SAVING INVISIBLE ASCII KEYS IS COMPLETELY UNTESTED and guaranteed to have bugs/corruption issues. if(tmp = put[file = fname(file)]){ cb(u, tmp.data); return } if(tmp = get[file]){ tmp.push(cb); return } get[file] = [cb]; RAD.get(file, function(err, data){ tmp = get[file]||''; delete get[file]; - var i = -1, f; while (f = tmp[++i]){ f(err, data) } // TODO: BUG! CPU SCHEDULE? + sT.each(tmp, function(cb){ cb && cb(err, data) }); }, opt); }; @@ -104,7 +93,6 @@ return t; } b.split = function(next, page){ - log("SPLIT!!!!", b.list.length); put(' ', '' + b.list, function(err, ok){ if(err){ console.log("ERR!"); return } // ?? @@ -117,12 +105,6 @@ //function fname(p){ return opt.file + '/' + ename(p.substring()) } function fname(p){ return ename(p.substring()) } - - function valid(word, is, reply){ - if(is !== is){ reply(word +" cannot be NaN!"); return } - return true; - } - function valid(word, is, reply){ if(is !== is){ reply(word +" cannot be NaN!"); return } return true; @@ -210,7 +192,7 @@ cb(401) } RAD.get = async function(file, cb, opt){ get && get(file, cb, opt); - var t = (await (await fetch('http://localhost:8766/gun/1data/'+file)).text()); + var t = (await (await fetch('http://localhost:8765/gun/authorsData/'+file)).text()); if('404' == t){ cb(); return } cb(null, t); } diff --git a/src/book.js b/src/book.js index 415afaa1a..f339c30f1 100644 --- a/src/book.js +++ b/src/book.js @@ -45,7 +45,7 @@ function get(word){ } function got(word, page){ var b = page.book, l, has, a, i; - if(l = from(page)){ has = l[got.i = i = spot(word, l, B.decode)]; } // TODO: POTENTIAL BUG! This assumes that each word on a page uses the same serializer/formatter/structure. + if(l = from(page)){ has = l[got.i = i = spot(word, l, B.decode)]; } // TODO: POTENTIAL BUG! This assumes that each word on a page uses the same serializer/formatter/structure. // TOOD: BUG!!! Not actually, but if we want to do non-exact radix-like closest-word lookups on a page, we need to check limbo & potentially sort first. // parseless may return -1 from actual value, so we may need to test both. // TODO: Double check? I think this is correct. if(has && word == has.word){ return (b.all[word] = has).is } if('string' != typeof has){ has = l[got.i = i+=1] } @@ -70,14 +70,14 @@ function spot(word, sorted, parse){ parse = parse || spot.no || (spot.no = funct function from(a, t, l){ if('string' != typeof a.from){ return a.from } - //(l = a.from = (t = a.from||'').substring(1, t.length-1).split(t[0])).toString = join; // slot - (l = a.from = slot(t = t||a.from||'')).toString = join; + //(l = a.from = (t = a.from||'').substring(1, t.length-1).split(t[0])); // slot + (l = a.from = slot(t = t||a.from||'')); return l; } function list(each){ each = each || function(x){return x} - // TODO: BUG!!! in limbo items need to get situated before calling this, if there are any. (obviously, we shouldn't do it again if limbo has previously been sorted). - var i = 0, l = from(this)||[], w, r = [], p = this.book.parse || function(){}; - while(w = l[i++]){ r.push(each(slot(w)[1],p(w)||w,this)) } + var i = 0, l = sort(this), w, r = [], p = this.book.parse || function(){}; + //while(w = l[i++]){ r.push(each(slot(w)[1], p(w)||w, this)) } + while(w = l[i++]){ r.push(each(this.get(w = w.word||p(w)||w), w, this)) } // TODO: BUG! PERF? return r; } @@ -92,7 +92,7 @@ function set(word, is){ // MUST be an insert: has = b.all[word] = {word: word, is: is, page: page, substring: subt, toString: tot}; page.first = (page.first < word)? page.first : word; - if(!page.limbo){ (page.limbo = []).toString = join } + if(!page.limbo){ (page.limbo = []) } page.limbo.push(has); b(word, is); page.size += size(word) + size(is); @@ -102,24 +102,18 @@ function set(word, is){ function split(p, b){ // TODO: use closest hash instead of half. //console.time(); - // TODO: BUG???? May need to do a SORTED merge with FROM. - var i = 0, L = p.limbo, tmp; - //while(tmp = L[i++]){ } - var L = p.limbo = sort(p.limbo), l = L.length, i = l/2 >> 0, j = i, half = L[j], tmp; + var L = sort(p), l = L.length, i = l/2 >> 0, j = i, half = L[j], tmp; //console.timeEnd(); - var next = {limbo: [], first: half.substring(), size: 0, substring: sub, toString: to, book: b, get: b, read: list}, nl = next.limbo; - nl.toString = join; + var next = {first: half.substring(), size: 0, substring: sub, toString: to, book: b, get: b, read: list}, f = next.from = []; //console.time(); while(tmp = L[i++]){ - nl.push(tmp); + f.push(tmp); next.size += (tmp.is||'').length||1; tmp.page = next; } - //console.timeEnd(); - //console.time(); - p.limbo = p.limbo.slice(0, j); + //console.timeEnd(); console.time(); + p.from = p.from.slice(0, j); p.size -= next.size; - p.sort = 0; b.list.splice(spot(next.first, b.list)+1, 0, next); // TODO: BUG! Make sure next.first is decoded text. // TODO: BUG! spot may need parse too? //console.timeEnd(); if(b.split){ b.split(next, p) } @@ -147,27 +141,29 @@ function tot(){ var tmp = {}; } function sub(i,j){ return (this.first||this.word||B.decode((from(this)||'')[0]||'')).substring(i,j) } function to(){ return this.text = this.text || text(this) } -function join(){ return this.join('|') } -function text(p){ var l = p.limbo; // TODO: BUG??? Shouldn't any stringify cause limbo to be reset? - if(!l){ return (typeof p.from == 'string')? (p.from||'')+'' : '|'+p.from+'|' } - if(!p.from){ return p.limbo = null, '|'+((l && sort(l).join('|'))||'')+'|' } // TODO: p.limbo should be reset each time we "flush". - return '|'+mix(l, from(p), p).join('|')+'|'; // commenting out this sub-portion of code fixed a more basic test, but will probably cause a bug with a FROM + MEMORY. -} -function mix(l, f, p){ // TODO: IMPROVE PERFORMANCE!!!! l[j] = i is 5X+ faster than .push( - var j = 0, i; +function text(p){ // PERF: read->[*] : text->"*" no edit waste 1 time perf. + if(p.limbo){ sort(p) } // TODO: BUG? Empty page meaning? undef, '', '||'? + return ('string' == typeof p.from)? p.from : '|'+(p.from||[]).join('|')+'|'; +} + +function sort(p, l){ + var f = p.from = ('string' == typeof p.from)? slot(p.from) : p.from||[]; + if(!(l = l || p.limbo)){ return f } + return mix(p).sort(function(a,b){ + return (a.word||B.decode(''+a)) < (b.word||B.decode(''+b))? -1:1; + }); +} +function mix(p, l){ // TODO: IMPROVE PERFORMANCE!!!! l[j] = i is 5X+ faster than .push( + l = l || p.limbo || []; p.limbo = null; + var j = 0, i, f = p.from; while(i = l[j++]){ if(got(i.word, p)){ - f[got.i] = i; + f[got.i] = i; // TODO: Trick: allow for a GUN'S HAM CRDT hook here. } else { f.push(i); } } - return sort(f); -} -function sort(l){ //return l.sort(); - return l.sort(function(a,b){ - return (a.word||B.decode(''+a)) < (b.word||B.decode(''+b))? -1:1; - }); + return f; } B.encode = function(d, s, u){ s = s || "|"; u = u || String.fromCharCode(32); diff --git a/test/rad/book.js b/test/rad/book.js index 8d9706964..3205b7a4e 100644 --- a/test/rad/book.js +++ b/test/rad/book.js @@ -374,6 +374,25 @@ var names = ["Adalard","Adora","Aia","Albertina","Alfie","Allyn","Amabil","Ammam }); + describe('API usage checks', function(){ + var opt = {file: 'search'} + var search = RAD(opt); + var b = Book(); + it('read results from in-memory data', async done => { + b('hello', '1data'); + var r = b.page('wat').read(); + expect(r).to.be.eql(['1data']); + b('hello', '1dataZ'); + r = b.page('wat').read(); + expect(r).to.be.eql(['1dataZ']); + b('new', '2data'); + r = b.page('wat').read(); + expect(r).to.be.eql(['1dataZ','2data']); + done(); + }); + + }); + console.log("Performance Tests: 2023 Nov 12, 60M put/sec, 120M get/sec, 1M get/sec with splits."); });