Skip to content

Commit

Permalink
Initial commit, implement and compare Murmur2
Browse files Browse the repository at this point in the history
  • Loading branch information
Robinson7D committed Jul 24, 2016
0 parents commit 9594e1b
Show file tree
Hide file tree
Showing 14 changed files with 357 additions and 0 deletions.
3 changes: 3 additions & 0 deletions .babelrc
@@ -0,0 +1,3 @@
{
"presets": ["es2015"]
}
2 changes: 2 additions & 0 deletions .gitignore
@@ -0,0 +1,2 @@
node_modules
dist
21 changes: 21 additions & 0 deletions LICENSE
@@ -0,0 +1,21 @@
The MIT License (MIT)

Copyright (c) 2016 Dylan Robinson

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
66 changes: 66 additions & 0 deletions README.md
@@ -0,0 +1,66 @@
# refry-js

Refrying that hash. (Hashing functions reimplemented in Javascript.)

---

## Currently available hashing functions:

- Murmur 2 (+ in a faster descending variant)

## Build (How To, Commands):

Install dependencies: `npm install`

Test speed: `babel-node benchmarks/speed/*.js` (where * is the type of input you're comparing)

Test collisions: `babel-node benchmarks/collisions/*.js` (where * is the type of input you're comparing)

Compile to ES5: `npm run makeES5`

_For words tests to work, one must have /usr/share/dict/words_

---

## Recent test runs:

### Speed

#### Words
>Comparing SPEED, 235887 dictionary words, using seed: 1393
refry Murmur2 port x 34.54 ops/sec ±0.40% (59 runs sampled)
refry Murmur2 port descending-modified x 37.16 ops/sec ±1.27% (63 runs sampled)
internet example x 26.83 ops/sec ±0.27% (47 runs sampled)
Fastest is refry Murmur2 port descending-modified

#### Ascending Integers
>Testing SPEED, 1000000 ascending Integers, using seed: 1393
refry Murmur2 port x 26.96 ops/sec ±2.86% (48 runs sampled)
refry Murmur2 port descending-modified x 33.12 ops/sec ±0.25% (57 runs sampled)
internet example x 14.98 ops/sec ±0.18% (41 runs sampled)
Fastest is refry Murmur2 port descending-modified


### Collisions

NOTICE:
This test is largely dependant on the seed.

However, you'll find that `internet example` and `refry Murmur2 port` always achieve the exact same collision count. In all my tests they hash to the same numbers (this may differ out of ASCII range?)

Some seeds benefit descending, some benefit ascending; on average they're very close.
This mostly acts as a sanity check 😻

#### Words
>Testing 235887 dictionary words, using seed: 1393
Collisions detected for refry Murmur2 port: 2
Collisions detected for refry Murmur2 port descending-modified: 2
Collisions detected for internet example: 2
Fewest collisions detected on: refry Murmur2 port

#### Ascending Integers
>Testing 1000000 ascending Integers, using seed: 1393
Collisions detected for refry Murmur2 port: 41
Collisions detected for refry Murmur2 port descending-modified: 35
Collisions detected for internet example: 41
Fewest collisions detected on: refry Murmur2 port descending-modified
11 changes: 11 additions & 0 deletions benchmarks/collisions/ascending-ints.js
@@ -0,0 +1,11 @@
import getFunctionsMap from '../helpers/get-murmur-fns-map';
import {consoleCompareCollisions} from '../helpers/count-collisions';

const SEED = process.env.seed || 1393; // A pretty good seed!
const testSize = process.env.test_size || 1000000; // One million!
const NUMBERS = [];

for(var i = 0; i < testSize; i++){ NUMBERS.push("" + i); }

console.log(`Testing ${testSize} ascending Integers, using seed: ${SEED}`);
consoleCompareCollisions(NUMBERS, getFunctionsMap(SEED));
10 changes: 10 additions & 0 deletions benchmarks/collisions/words.js
@@ -0,0 +1,10 @@
import getFunctionsMap from '../helpers/get-murmur-fns-map';
import {consoleCompareCollisions} from '../helpers/count-collisions';
import getDictionaryWords from '../helpers/get-dictionary-words';

const SEED = process.env.seed || 1393; // A pretty good seed!

getDictionaryWords(function(words){
console.log(`Testing ${words.length} dictionary words, using seed: ${SEED}`);
consoleCompareCollisions(words, getFunctionsMap(SEED));
});
33 changes: 33 additions & 0 deletions benchmarks/helpers/count-collisions.js
@@ -0,0 +1,33 @@
export {
getCollisionsCount as getCollisionsCount,
consoleCompareCollisions as consoleCompareCollisions
};

function consoleCompareCollisions(words, hashingFnsToCompareMap) {
var leastComparisons = Infinity,
leastComparisonsTitle = "";

hashingFnsToCompareMap.forEach(function(hashingFn, title){
let count = getCollisionsCount(words, hashingFn);

console.log(`Collisions detected for ${title}: ${count}`);
if(count < leastComparisons){
leastComparisons = count;
leastComparisonsTitle = title;
}
});
console.log(`Fewest collisions detected on: ${leastComparisonsTitle}`)
}

function getCollisionsCount(words, hashingFn){
let seen = new Set(), collisionsCount = 0;

words.forEach(function(word){
let hashed = hashingFn(word);

if(seen.has(hashed)) collisionsCount++;
else seen.add(hashed);
});

return collisionsCount;
}
10 changes: 10 additions & 0 deletions benchmarks/helpers/get-dictionary-words.js
@@ -0,0 +1,10 @@
var fs = require('fs'); // Load fs module for node.
export default getDictionaryWords;

function getDictionaryWords(fn){ // TODO: Make Promise?
fs.readFile('/usr/share/dict/words', "utf8", function(err, data) {
if (err) { throw err; }

fn(data.split('\n'));
});
}
13 changes: 13 additions & 0 deletions benchmarks/helpers/get-murmur-fns-map.js
@@ -0,0 +1,13 @@
import murmur2_32 from '../../src/murmur2';
import decr_murmur2_32 from '../../src/decrementing-murmur2';
var murmurhash2_32_gc = require("murmurhash-js").murmur2;

export default getSeededMurmurMap;

function getSeededMurmurMap(SEED){
return new Map([
['refry Murmur2 port', (word)=> murmur2_32(SEED, word)],
['refry Murmur2 port descending-modified', (word)=> decr_murmur2_32(SEED, word)],
['internet example', (word)=> murmurhash2_32_gc(word, SEED)],
]);
}
32 changes: 32 additions & 0 deletions benchmarks/speed/ascending-ints.js
@@ -0,0 +1,32 @@
import Benchmark from '../../node_modules/benchmark/benchmark';
import getFunctionsMap from '../helpers/get-murmur-fns-map';
import {consoleCompareCollisions} from '../helpers/count-collisions';

const SEED = process.env.seed || 1393; // A pretty good seed!
const testSize = process.env.test_size || 1000000; // One million!
const NUMBERS = [];

for(var i = 0; i < testSize; i++){ NUMBERS.push("" + i); }

console.log(`Testing SPEED, ${testSize} ascending Integers, using seed: ${SEED}`);
let benchmark = new Benchmark.Suite();

getFunctionsMap(SEED).forEach(function(fn, title){
benchmark.add(title, function(){ NUMBERS.forEach(fn); });
});

benchmark.on('cycle', ({target})=> console.log(String(target)))
.on('complete', function(){
console.log('Fastest is ' + this.filter('fastest').map('name'));
})
.run();

/*
* RECENT RUN:
*
* Testing SPEED, 1000000 ascending Integers, using seed: 1393
* refry Murmur2 port x 26.68 ops/sec ±4.10% (48 runs sampled)
* refry Murmur2 port descending-modified x 32.12 ops/sec ±0.42% (55 runs sampled)
* internet example x 14.72 ops/sec ±0.35% (40 runs sampled)
* Fastest is refry Murmur2 port descending-modified
*/
20 changes: 20 additions & 0 deletions benchmarks/speed/words.js
@@ -0,0 +1,20 @@
import Benchmark from '../../node_modules/benchmark/benchmark';
import getFunctionsMap from '../helpers/get-murmur-fns-map';
import getDictionaryWords from '../helpers/get-dictionary-words';

const SEED = process.env.seed || 1393; // A pretty good seed!

getDictionaryWords(function(words){
console.log(`Comparing SPEED, ${words.length} dictionary words, using seed: ${SEED}`);

let benchmark = new Benchmark.Suite();
getFunctionsMap(SEED).forEach(function(fn, title){
benchmark.add(title, function(){ words.forEach(fn); });
});

benchmark.on('cycle', ({target})=> console.log(String(target)))
.on('complete', function(){
console.log('Fastest is ' + this.filter('fastest').map('name'));
})
.run();
});
23 changes: 23 additions & 0 deletions package.json
@@ -0,0 +1,23 @@
{
"name": "structured-js",
"private": false,
"version": "0.0.1",
"description": "Common data structures implemented in Javascript",
"repository": "http://dylancodes.net",
"license": "MIT",
"dependencies": {
"babel-polyfill": "^6.7.2"
},
"devDependencies": {
"babel-cli": "^6.2.0",
"babel-core": "^6.0.20",
"babel-preset-es2015": "^6.9.0",
"benchmark": "^2.1.0",

"murmurhash-js": "*"
},
"scripts": {
"test": "mocha",
"makeES5": "babel src --presets babel-preset-es2015 --out-dir dist/es5"
}
}
56 changes: 56 additions & 0 deletions src/decrementing-murmur2.js
@@ -0,0 +1,56 @@
const M = 0x5bd1e995;
const IMUL = Math.imul || imulPolyfill;

export default decrMurmur2_32;

/*
* Based on aappleby's original implementation
* (Available at: https://github.com/aappleby/smhasher).
*
* Modified lightly as Javascript does not have a nice way to point to arbitrary memory.
* As such, Extended ASCII (0-255) only! Otherwise you're in trouble.
*
* Additionally modified to work backward, because this version happens to be faster in JS
* than the original port.
*/
function decrMurmur2_32(seed, str) {
var position = str.length - 1,
h = seed ^ position, // Off by one from original spec (but should be fine?)
curValue = 0;

while(position >= 3) { // Going backward instead of forward for simplicity
curValue = str.charCodeAt(position--)
| (str.charCodeAt(position--) << 8)
| (str.charCodeAt(position--) << 16)
| (str.charCodeAt(position--) << 24);

curValue = IMUL(curValue, M);
curValue ^= curValue >>> 24 ;

h = IMUL(h, M) ^ IMUL(curValue, M); // Hash curValue back into h
}

switch(position) {
case 2: h ^= str.charCodeAt(position--) << 16;
case 1: h ^= str.charCodeAt(position--) << 8;
case 0: h ^= str.charCodeAt(position);
h = IMUL(h, M);
};

h ^= (h >>> 13);
h = IMUL(h, M);
h ^= (h >>> 15);

return h;
}

// From https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Math/imul
function imulPolyfill(a, b) {
var ah = (a >>> 16) & 0xffff;
var al = a & 0xffff;
var bh = (b >>> 16) & 0xffff;
var bl = b & 0xffff;
// the shift by 0 fixes the sign on the high part
// the final |0 converts the unsigned value into a signed value
return ((al * bl) + (((ah * bl + al * bh) << 16) >>> 0)|0);
}
57 changes: 57 additions & 0 deletions src/murmur2.js
@@ -0,0 +1,57 @@
const M = 0x5bd1e995;
const IMUL = Math.imul || imulPolyfill;

export default murmur2_32;

/*
* Based on aappleby's original implementation
* (Available at: https://github.com/aappleby/smhasher).
*
* Modified lightly as Javascript does not have a nice way to point to arbitrary memory.
* As such, Extended ASCII (0-255) only! Otherwise you're in trouble.
*/
function murmur2_32(seed, str) {
var position = 0,
len = str.length,
h = seed ^ len, // Off by one from original spec (but should be fine?)
curValue = 0;

while(len >= 4) { // Going backward instead of forward for simplicity
curValue = str.charCodeAt(position)
| (str.charCodeAt(position+1) << 8)
| (str.charCodeAt(position+2) << 16)
| (str.charCodeAt(position+3) << 24);

curValue = IMUL(curValue, M);
curValue ^= curValue >>> 24 ;

h = IMUL(h, M) ^ IMUL(curValue, M); // Hash curValue back into h

len -= 4;
position += 4;
}

switch(len) {
case 3: h ^= str.charCodeAt(position+2) << 16;
case 2: h ^= str.charCodeAt(position+1) << 8;
case 1: h ^= str.charCodeAt(position);
h = IMUL(h, M);
};

h ^= (h >>> 13);
h = IMUL(h, M);
h ^= (h >>> 15);

return h >>> 0;
}

// From https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Math/imul
function imulPolyfill(a, b) {
var ah = (a >>> 16) & 0xffff;
var al = a & 0xffff;
var bh = (b >>> 16) & 0xffff;
var bl = b & 0xffff;
// the shift by 0 fixes the sign on the high part
// the final |0 converts the unsigned value into a signed value
return ((al * bl) + (((ah * bl + al * bh) << 16) >>> 0)|0);
}

0 comments on commit 9594e1b

Please sign in to comment.