Skip to content
Browse files

Added createHashes() to be able to reuse the results from the digest(…

…) function.

Added add(byte[]) and contains(byte[]) as faster alternatives to add() and contains()
  • Loading branch information...
1 parent 3438c83 commit 4da4b1e0da7dd4d2bc744295e335d5eabbb352d6 @MagnusS committed Apr 13, 2011
Showing with 99 additions and 44 deletions.
  1. +72 −36 src/com/skjegstad/utils/BloomFilter.java
  2. +27 −8 test/com/skjegstad/utils/BloomFilterTest.java
View
108 src/com/skjegstad/utils/BloomFilter.java
@@ -122,45 +122,65 @@ public BloomFilter(int bitSetSize, int expectedNumberOfFilterElements, int actua
*
* @param val specifies the input data.
* @param charset specifies the encoding of the input data.
- * @param salt to use for the digest function
* @return digest as long.
*/
- public static int createHash(String val, Charset charset, byte salt) {
- return createHash(val.getBytes(charset), salt);
+ public static int createHash(String val, Charset charset) {
+ return createHash(val.getBytes(charset));
}
/**
* Generates a digest based on the contents of a String.
*
* @param val specifies the input data. The encoding is expected to be UTF-8.
- * @param salt to use for the digest function
* @return digest as long.
*/
- public static int createHash(String val, byte salt) {
- return createHash(val, charset, salt);
+ public static int createHash(String val) {
+ return createHash(val, charset);
}
/**
* Generates a digest based on the contents of an array of bytes.
*
* @param data specifies input data.
- * @param salt to use for the digest function
* @return digest as long.
*/
- public static int createHash(byte[] data, byte salt) {
- int h = 0;
- byte[] res;
-
- synchronized (digestFunction) {
- digestFunction.update(salt);
- res = digestFunction.digest(data);
- }
+ public static int createHash(byte[] data) {
+ return createHashes(data, 1)[0];
+ }
- for (int i = 0; i < 4; i++) {
- h <<= 8;
- h |= ((int) res[i]) & 0xFF;
+ /**
+ * Generates digests based on the contents of an array of bytes and splits the result into 4-byte int's and store them in an array. The
+ * digest function is called until the required number of int's are produced. For each call to digest a salt
+ * is prepended to the data. The salt is increased by 1 for each call.
+ *
+ * @param data specifies input data.
+ * @param hashes number of hashes/int's to produce.
+ * @return array of int-sized hashes
+ */
+ public static int[] createHashes(byte[] data, int hashes) {
+ int[] result = new int[hashes];
+
+ int k = 0;
+ byte salt = 0;
+ while (k < hashes) {
+ byte[] digest;
+ synchronized (digestFunction) {
+ digestFunction.update(salt);
+ salt++;
+ digest = digestFunction.digest(data);
+ }
+
+ for (int i = 0; i < digest.length/4 && k < hashes; i++) {
+ int h = 0;
+ for (int j = (i*4); j < (i*4)+4; j++) {
+ h <<= 8;
+ h |= ((int) digest[j]) & 0xFF;
+ }
+ result[k] = h;
+ k++;
+ }
}
- return h;
+ return result;
}
/**
@@ -275,13 +295,19 @@ public void clear() {
* @param element is an element to register in the Bloom filter.
*/
public void add(E element) {
- long hash;
- byte[] data = element.toString().getBytes(charset);
- for (int x = 0; x < k; x++) {
- hash = createHash(data, (byte)x);
- hash = hash % (long)bitSetSize;
- bitset.set(Math.abs((int)hash), true);
- }
+ add(element.toString().getBytes(charset));
+ numberOfAddedElements ++;
+ }
+
+ /**
+ * Adds an array of bytes to the Bloom filter.
+ *
+ * @param bytes array of bytes to add to the Bloom filter.
+ */
+ public void add(byte[] bytes) {
+ int[] hashes = createHashes(bytes, k);
+ for (int hash : hashes)
+ bitset.set(Math.abs(hash % bitSetSize), true);
numberOfAddedElements ++;
}
@@ -293,7 +319,7 @@ public void addAll(Collection<? extends E> c) {
for (E element : c)
add(element);
}
-
+
/**
* Returns true if the element could have been inserted into the Bloom filter.
* Use getFalsePositiveProbability() to calculate the probability of this
@@ -303,15 +329,25 @@ public void addAll(Collection<? extends E> c) {
* @return true if the element could have been inserted into the Bloom filter.
*/
public boolean contains(E element) {
- long hash;
- byte[] data = element.toString().getBytes(charset);
- for (int x = 0; x < k; x++) {
- hash = createHash(data, (byte)x);
- hash = hash % (long)bitSetSize;
- if (!bitset.get(Math.abs((int)hash)))
- return false;
- }
- return true;
+ return contains(element.toString().getBytes(charset));
+ }
+
+ /**
+ * Returns true if the array of bytes could have been inserted into the Bloom filter.
+ * Use getFalsePositiveProbability() to calculate the probability of this
+ * being correct.
+ *
+ * @param bytes array of bytes to check.
+ * @return true if the array could have been inserted into the Bloom filter.
+ */
+ public boolean contains(byte[] bytes) {
+ int[] hashes = createHashes(bytes, k);
+ for (int hash : hashes) {
+ if (!bitset.get(Math.abs(hash % bitSetSize))) {
+ return false;
+ }
+ }
+ return true;
}
/**
View
35 test/com/skjegstad/utils/BloomFilterTest.java
@@ -56,14 +56,13 @@ public void testConstructorCNK() throws Exception {
public void testCreateHash_String() throws Exception {
System.out.println("createHash");
String val = UUID.randomUUID().toString();
- byte salt = 11;
- int result1 = BloomFilter.createHash(val, salt);
- int result2 = BloomFilter.createHash(val, salt);
+ int result1 = BloomFilter.createHash(val);
+ int result2 = BloomFilter.createHash(val);
assertEquals(result2, result1);
- long result3 = BloomFilter.createHash(UUID.randomUUID().toString(),salt);
+ int result3 = BloomFilter.createHash(UUID.randomUUID().toString());
assertNotSame(result3, result2);
- long result4 = BloomFilter.createHash(val.getBytes("UTF-8"), salt);
+ int result4 = BloomFilter.createHash(val.getBytes("UTF-8"));
assertEquals(result4, result1);
}
@@ -76,13 +75,33 @@ public void testCreateHash_byteArr() throws UnsupportedEncodingException {
System.out.println("createHash");
String val = UUID.randomUUID().toString();
byte[] data = val.getBytes("UTF-8");
- byte salt = 11;
- long result1 = BloomFilter.createHash(data, salt);
- long result2 = BloomFilter.createHash(val, salt);
+ int result1 = BloomFilter.createHash(data);
+ int result2 = BloomFilter.createHash(val);
assertEquals(result1, result2);
}
/**
+ * Test of createHash method, of class BloomFilter.
+ * @throws UnsupportedEncodingException
+ */
+ @Test
+ public void testCreateHashes_byteArr() throws UnsupportedEncodingException {
+ System.out.println("createHashes");
+ String val = UUID.randomUUID().toString();
+ byte[] data = val.getBytes("UTF-8");
+ int[] result1 = BloomFilter.createHashes(data, 10);
+ int[] result2 = BloomFilter.createHashes(data, 10);
+ assertEquals(result1.length, 10);
+ assertEquals(result2.length, 10);
+ assertArrayEquals(result1, result2);
+ int[] result3 = BloomFilter.createHashes(data, 5);
+ assertEquals(result3.length, 5);
+ for (int i = 0; i < result3.length; i++)
+ assertEquals(result3[i], result1[i]);
+
+ }
+
+ /**
* Test of equals method, of class BloomFilter.
* @throws UnsupportedEncodingException
*/

0 comments on commit 4da4b1e

Please sign in to comment.
Something went wrong with that request. Please try again.