Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Initial commit, implement and compare Murmur2
- Loading branch information
0 parents
commit 9594e1b
Showing
14 changed files
with
357 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
{ | ||
"presets": ["es2015"] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
node_modules | ||
dist |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
The MIT License (MIT) | ||
|
||
Copyright (c) 2016 Dylan Robinson | ||
|
||
Permission is hereby granted, free of charge, to any person obtaining a copy | ||
of this software and associated documentation files (the "Software"), to deal | ||
in the Software without restriction, including without limitation the rights | ||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
copies of the Software, and to permit persons to whom the Software is | ||
furnished to do so, subject to the following conditions: | ||
|
||
The above copyright notice and this permission notice shall be included in all | ||
copies or substantial portions of the Software. | ||
|
||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
SOFTWARE. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
# refry-js | ||
|
||
Refrying that hash. (Hashing functions reimplemented in Javascript.) | ||
|
||
--- | ||
|
||
## Currently available hashing functions: | ||
|
||
- Murmur 2 (+ in a faster descending variant) | ||
|
||
## Build (How To, Commands): | ||
|
||
Install dependencies: `npm install` | ||
|
||
Test speed: `babel-node benchmarks/speed/*.js` (where * is the type of input you're comparing) | ||
|
||
Test collisions: `babel-node benchmarks/collisions/*.js` (where * is the type of input you're comparing) | ||
|
||
Compile to ES5: `npm run makeES5` | ||
|
||
_For words tests to work, one must have /usr/share/dict/words_ | ||
|
||
--- | ||
|
||
## Recent test runs: | ||
|
||
### Speed | ||
|
||
#### Words | ||
>Comparing SPEED, 235887 dictionary words, using seed: 1393 | ||
refry Murmur2 port x 34.54 ops/sec ±0.40% (59 runs sampled) | ||
refry Murmur2 port descending-modified x 37.16 ops/sec ±1.27% (63 runs sampled) | ||
internet example x 26.83 ops/sec ±0.27% (47 runs sampled) | ||
Fastest is refry Murmur2 port descending-modified | ||
|
||
#### Ascending Integers | ||
>Testing SPEED, 1000000 ascending Integers, using seed: 1393 | ||
refry Murmur2 port x 26.96 ops/sec ±2.86% (48 runs sampled) | ||
refry Murmur2 port descending-modified x 33.12 ops/sec ±0.25% (57 runs sampled) | ||
internet example x 14.98 ops/sec ±0.18% (41 runs sampled) | ||
Fastest is refry Murmur2 port descending-modified | ||
|
||
|
||
### Collisions | ||
|
||
NOTICE: | ||
This test is largely dependant on the seed. | ||
|
||
However, you'll find that `internet example` and `refry Murmur2 port` always achieve the exact same collision count. In all my tests they hash to the same numbers (this may differ out of ASCII range?) | ||
|
||
Some seeds benefit descending, some benefit ascending; on average they're very close. | ||
This mostly acts as a sanity check 😻 | ||
|
||
#### Words | ||
>Testing 235887 dictionary words, using seed: 1393 | ||
Collisions detected for refry Murmur2 port: 2 | ||
Collisions detected for refry Murmur2 port descending-modified: 2 | ||
Collisions detected for internet example: 2 | ||
Fewest collisions detected on: refry Murmur2 port | ||
|
||
#### Ascending Integers | ||
>Testing 1000000 ascending Integers, using seed: 1393 | ||
Collisions detected for refry Murmur2 port: 41 | ||
Collisions detected for refry Murmur2 port descending-modified: 35 | ||
Collisions detected for internet example: 41 | ||
Fewest collisions detected on: refry Murmur2 port descending-modified |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
import getFunctionsMap from '../helpers/get-murmur-fns-map'; | ||
import {consoleCompareCollisions} from '../helpers/count-collisions'; | ||
|
||
const SEED = process.env.seed || 1393; // A pretty good seed! | ||
const testSize = process.env.test_size || 1000000; // One million! | ||
const NUMBERS = []; | ||
|
||
for(var i = 0; i < testSize; i++){ NUMBERS.push("" + i); } | ||
|
||
console.log(`Testing ${testSize} ascending Integers, using seed: ${SEED}`); | ||
consoleCompareCollisions(NUMBERS, getFunctionsMap(SEED)); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
import getFunctionsMap from '../helpers/get-murmur-fns-map'; | ||
import {consoleCompareCollisions} from '../helpers/count-collisions'; | ||
import getDictionaryWords from '../helpers/get-dictionary-words'; | ||
|
||
const SEED = process.env.seed || 1393; // A pretty good seed! | ||
|
||
getDictionaryWords(function(words){ | ||
console.log(`Testing ${words.length} dictionary words, using seed: ${SEED}`); | ||
consoleCompareCollisions(words, getFunctionsMap(SEED)); | ||
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
export { | ||
getCollisionsCount as getCollisionsCount, | ||
consoleCompareCollisions as consoleCompareCollisions | ||
}; | ||
|
||
function consoleCompareCollisions(words, hashingFnsToCompareMap) { | ||
var leastComparisons = Infinity, | ||
leastComparisonsTitle = ""; | ||
|
||
hashingFnsToCompareMap.forEach(function(hashingFn, title){ | ||
let count = getCollisionsCount(words, hashingFn); | ||
|
||
console.log(`Collisions detected for ${title}: ${count}`); | ||
if(count < leastComparisons){ | ||
leastComparisons = count; | ||
leastComparisonsTitle = title; | ||
} | ||
}); | ||
console.log(`Fewest collisions detected on: ${leastComparisonsTitle}`) | ||
} | ||
|
||
function getCollisionsCount(words, hashingFn){ | ||
let seen = new Set(), collisionsCount = 0; | ||
|
||
words.forEach(function(word){ | ||
let hashed = hashingFn(word); | ||
|
||
if(seen.has(hashed)) collisionsCount++; | ||
else seen.add(hashed); | ||
}); | ||
|
||
return collisionsCount; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
var fs = require('fs'); // Load fs module for node. | ||
export default getDictionaryWords; | ||
|
||
function getDictionaryWords(fn){ // TODO: Make Promise? | ||
fs.readFile('/usr/share/dict/words', "utf8", function(err, data) { | ||
if (err) { throw err; } | ||
|
||
fn(data.split('\n')); | ||
}); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
import murmur2_32 from '../../src/murmur2'; | ||
import decr_murmur2_32 from '../../src/decrementing-murmur2'; | ||
var murmurhash2_32_gc = require("murmurhash-js").murmur2; | ||
|
||
export default getSeededMurmurMap; | ||
|
||
function getSeededMurmurMap(SEED){ | ||
return new Map([ | ||
['refry Murmur2 port', (word)=> murmur2_32(SEED, word)], | ||
['refry Murmur2 port descending-modified', (word)=> decr_murmur2_32(SEED, word)], | ||
['internet example', (word)=> murmurhash2_32_gc(word, SEED)], | ||
]); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
import Benchmark from '../../node_modules/benchmark/benchmark'; | ||
import getFunctionsMap from '../helpers/get-murmur-fns-map'; | ||
import {consoleCompareCollisions} from '../helpers/count-collisions'; | ||
|
||
const SEED = process.env.seed || 1393; // A pretty good seed! | ||
const testSize = process.env.test_size || 1000000; // One million! | ||
const NUMBERS = []; | ||
|
||
for(var i = 0; i < testSize; i++){ NUMBERS.push("" + i); } | ||
|
||
console.log(`Testing SPEED, ${testSize} ascending Integers, using seed: ${SEED}`); | ||
let benchmark = new Benchmark.Suite(); | ||
|
||
getFunctionsMap(SEED).forEach(function(fn, title){ | ||
benchmark.add(title, function(){ NUMBERS.forEach(fn); }); | ||
}); | ||
|
||
benchmark.on('cycle', ({target})=> console.log(String(target))) | ||
.on('complete', function(){ | ||
console.log('Fastest is ' + this.filter('fastest').map('name')); | ||
}) | ||
.run(); | ||
|
||
/* | ||
* RECENT RUN: | ||
* | ||
* Testing SPEED, 1000000 ascending Integers, using seed: 1393 | ||
* refry Murmur2 port x 26.68 ops/sec ±4.10% (48 runs sampled) | ||
* refry Murmur2 port descending-modified x 32.12 ops/sec ±0.42% (55 runs sampled) | ||
* internet example x 14.72 ops/sec ±0.35% (40 runs sampled) | ||
* Fastest is refry Murmur2 port descending-modified | ||
*/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
import Benchmark from '../../node_modules/benchmark/benchmark'; | ||
import getFunctionsMap from '../helpers/get-murmur-fns-map'; | ||
import getDictionaryWords from '../helpers/get-dictionary-words'; | ||
|
||
const SEED = process.env.seed || 1393; // A pretty good seed! | ||
|
||
getDictionaryWords(function(words){ | ||
console.log(`Comparing SPEED, ${words.length} dictionary words, using seed: ${SEED}`); | ||
|
||
let benchmark = new Benchmark.Suite(); | ||
getFunctionsMap(SEED).forEach(function(fn, title){ | ||
benchmark.add(title, function(){ words.forEach(fn); }); | ||
}); | ||
|
||
benchmark.on('cycle', ({target})=> console.log(String(target))) | ||
.on('complete', function(){ | ||
console.log('Fastest is ' + this.filter('fastest').map('name')); | ||
}) | ||
.run(); | ||
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
{ | ||
"name": "structured-js", | ||
"private": false, | ||
"version": "0.0.1", | ||
"description": "Common data structures implemented in Javascript", | ||
"repository": "http://dylancodes.net", | ||
"license": "MIT", | ||
"dependencies": { | ||
"babel-polyfill": "^6.7.2" | ||
}, | ||
"devDependencies": { | ||
"babel-cli": "^6.2.0", | ||
"babel-core": "^6.0.20", | ||
"babel-preset-es2015": "^6.9.0", | ||
"benchmark": "^2.1.0", | ||
|
||
"murmurhash-js": "*" | ||
}, | ||
"scripts": { | ||
"test": "mocha", | ||
"makeES5": "babel src --presets babel-preset-es2015 --out-dir dist/es5" | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
const M = 0x5bd1e995; | ||
const IMUL = Math.imul || imulPolyfill; | ||
|
||
export default decrMurmur2_32; | ||
|
||
/* | ||
* Based on aappleby's original implementation | ||
* (Available at: https://github.com/aappleby/smhasher). | ||
* | ||
* Modified lightly as Javascript does not have a nice way to point to arbitrary memory. | ||
* As such, Extended ASCII (0-255) only! Otherwise you're in trouble. | ||
* | ||
* Additionally modified to work backward, because this version happens to be faster in JS | ||
* than the original port. | ||
*/ | ||
function decrMurmur2_32(seed, str) { | ||
var position = str.length - 1, | ||
h = seed ^ position, // Off by one from original spec (but should be fine?) | ||
curValue = 0; | ||
|
||
while(position >= 3) { // Going backward instead of forward for simplicity | ||
curValue = str.charCodeAt(position--) | ||
| (str.charCodeAt(position--) << 8) | ||
| (str.charCodeAt(position--) << 16) | ||
| (str.charCodeAt(position--) << 24); | ||
|
||
curValue = IMUL(curValue, M); | ||
curValue ^= curValue >>> 24 ; | ||
|
||
h = IMUL(h, M) ^ IMUL(curValue, M); // Hash curValue back into h | ||
} | ||
|
||
switch(position) { | ||
case 2: h ^= str.charCodeAt(position--) << 16; | ||
case 1: h ^= str.charCodeAt(position--) << 8; | ||
case 0: h ^= str.charCodeAt(position); | ||
h = IMUL(h, M); | ||
}; | ||
|
||
h ^= (h >>> 13); | ||
h = IMUL(h, M); | ||
h ^= (h >>> 15); | ||
|
||
return h; | ||
} | ||
|
||
// From https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Math/imul | ||
function imulPolyfill(a, b) { | ||
var ah = (a >>> 16) & 0xffff; | ||
var al = a & 0xffff; | ||
var bh = (b >>> 16) & 0xffff; | ||
var bl = b & 0xffff; | ||
// the shift by 0 fixes the sign on the high part | ||
// the final |0 converts the unsigned value into a signed value | ||
return ((al * bl) + (((ah * bl + al * bh) << 16) >>> 0)|0); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
const M = 0x5bd1e995; | ||
const IMUL = Math.imul || imulPolyfill; | ||
|
||
export default murmur2_32; | ||
|
||
/* | ||
* Based on aappleby's original implementation | ||
* (Available at: https://github.com/aappleby/smhasher). | ||
* | ||
* Modified lightly as Javascript does not have a nice way to point to arbitrary memory. | ||
* As such, Extended ASCII (0-255) only! Otherwise you're in trouble. | ||
*/ | ||
function murmur2_32(seed, str) { | ||
var position = 0, | ||
len = str.length, | ||
h = seed ^ len, // Off by one from original spec (but should be fine?) | ||
curValue = 0; | ||
|
||
while(len >= 4) { // Going backward instead of forward for simplicity | ||
curValue = str.charCodeAt(position) | ||
| (str.charCodeAt(position+1) << 8) | ||
| (str.charCodeAt(position+2) << 16) | ||
| (str.charCodeAt(position+3) << 24); | ||
|
||
curValue = IMUL(curValue, M); | ||
curValue ^= curValue >>> 24 ; | ||
|
||
h = IMUL(h, M) ^ IMUL(curValue, M); // Hash curValue back into h | ||
|
||
len -= 4; | ||
position += 4; | ||
} | ||
|
||
switch(len) { | ||
case 3: h ^= str.charCodeAt(position+2) << 16; | ||
case 2: h ^= str.charCodeAt(position+1) << 8; | ||
case 1: h ^= str.charCodeAt(position); | ||
h = IMUL(h, M); | ||
}; | ||
|
||
h ^= (h >>> 13); | ||
h = IMUL(h, M); | ||
h ^= (h >>> 15); | ||
|
||
return h >>> 0; | ||
} | ||
|
||
// From https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Math/imul | ||
function imulPolyfill(a, b) { | ||
var ah = (a >>> 16) & 0xffff; | ||
var al = a & 0xffff; | ||
var bh = (b >>> 16) & 0xffff; | ||
var bl = b & 0xffff; | ||
// the shift by 0 fixes the sign on the high part | ||
// the final |0 converts the unsigned value into a signed value | ||
return ((al * bl) + (((ah * bl + al * bh) << 16) >>> 0)|0); | ||
} |