Skip to content

Commit

Permalink
Moved to CityHash (proven to be faster); Made Hashing and BytesIntero…
Browse files Browse the repository at this point in the history
…p API more cohrent; removed Void marshalling because values should never be null
  • Loading branch information
leventov committed Jan 26, 2015
1 parent 935c242 commit 6869783
Show file tree
Hide file tree
Showing 51 changed files with 1,619 additions and 546 deletions.
151 changes: 142 additions & 9 deletions src/main/java/net/openhft/chronicle/hash/hashing/Access.java
Expand Up @@ -16,8 +16,13 @@

package net.openhft.chronicle.hash.hashing;

import net.openhft.lang.io.Bytes;

import java.nio.ByteBuffer;
import java.nio.ByteOrder;

import static java.nio.ByteOrder.LITTLE_ENDIAN;

/**
* Strategy of reading bytes, defines the abstraction of {@code T} class instances as ordered byte
* sequence. All {@code getXXX(input, offset)} should be consistent to each other in terms of
Expand All @@ -38,15 +43,119 @@
* range is outside of the bounds of the byte sequence, represented by the given {@code input}.
* However, they could omit checks for better performance.
*
* <p>Only {@link #getByte(Object, long)} and {@link #byteOrder(Object)} methods are abstract in
* this class, so implementing them is sufficient for valid {@code Access} instance, but for
* efficiency your should override methods used by target {@link LongHashFunction} implementation.
*
* <p>{@code Access} API is designed for inputs, that actually represent byte sequences that lay
* continuously in memory. Theoretically {@code Access} strategy could be implemented for
* non-continuous byte sequences, or abstractions which aren't actually present in memory as they
* are accessed, but this should be awkward, and hashing using such {@code Access} is expected to
* be slow.
*
* @param <T> the type of the object to access
* @see LongHashFunction#hash(Object, Access, long, long)
*/
public interface Access<T> {
public abstract class Access<T> {

/**
* Returns the {@code Access} to any {@link Bytes}. This {@code Access} isn't useful in
* the user code, because methods {@link LongHashFunction#hashBytes(Bytes)} and
* {@link LongHashFunction#hashBytes(Bytes, long, long)} exist. This {@code Access} could be
* used in new {@link LongHashFunction} implementations.
*
* @return the {@code Access} to {@link Bytes} instances
*/
public static Access<Bytes> toBytes() {
return BytesAccess.INSTANCE;
}

/**
* Returns the {@code Access} delegating {@code getXXX(input, offset)} methods to {@code
* sun.misc.Unsafe.getXXX(input, offset)}.
*
* <p>Usage example: <pre>{@code
* class Pair {
* long first, second;
*
* static final long pairDataOffset =
* theUnsafe.objectFieldOffset(Pair.class.getDeclaredField("first"));
*
* static long hashPair(Pair pair, LongHashFunction hashFunction) {
* return hashFunction.hash(pair, Access.unsafe(), pairDataOffset, 16L);
* }
* }}</pre>
*
* <p>{@code null} is a valid input, on accepting {@code null} {@code Unsafe} just interprets
* the given offset as a wild memory address. Note that for hashing memory by address there is
* a shortcut {@link LongHashFunction#hashMemory(long, long) hashMemory(address, len)} method.
*
* @param <T> the type of objects to access
* @return the unsafe memory {@code Access}
*/
public static <T> Access<T> unsafe() {
return (Access<T>) UnsafeAccess.INSTANCE;
}

/**
* Returns the {@code Access} to any {@link ByteBuffer}. This {@code Access} isn't useful in
* the user code, because methods {@link LongHashFunction#hashBytes(ByteBuffer)} and
* {@link LongHashFunction#hashBytes(ByteBuffer, int, int)} exist. This {@code Access} could be
* used in new {@link LongHashFunction} implementations.
*
* @return the {@code Access} to {@link ByteBuffer}s
*/
public static Access<ByteBuffer> toByteBuffer() {
return ByteBufferAccess.INSTANCE;
}

/**
* Returns the {@code Access} to {@link CharSequence}s backed by {@linkplain
* ByteOrder#nativeOrder() native} {@code char} reads, typically from {@code char[]} array.
*
* <p>Usage example:<pre>{@code
* static long hashStringBuffer(StringBuffer buffer, LongHashFunction hashFunction) {
* return hashFunction.hash(buffer, Access.toNativeCharSequence(),
* // * 2L because length is passed in bytes, not chars
* 0L, buffer.length() * 2L);
* }}</pre>
*
* <p>This method is a shortcut for {@code Access.toCharSequence(ByteOrder.nativeOrder())}.
*
* @param <T> the {@code CharSequence} subtype (backed by native {@code char reads}) to access
* @return the {@code Access} to {@link CharSequence}s backed by native {@code char} reads
* @see #toCharSequence(ByteOrder)
*/
public static <T extends CharSequence> Access<T> toNativeCharSequence() {
return (Access<T>) CharSequenceAccess.nativeCharSequenceAccess();
}

/**
* Returns the {@code Access} to {@link CharSequence}s backed by {@code char} reads made in
* the specified byte order.
*
* <p>Usage example:<pre>{@code
* static long hashCharBuffer(CharBuffer buffer, LongHashFunction hashFunction) {
* return hashFunction.hash(buffer, Access.toCharSequence(buffer.order()),
* // * 2L because length is passed in bytes, not chars
* 0L, buffer.length() * 2L);
* }}</pre>
*
* @param backingOrder the byte order of {@code char} reads backing
* {@code CharSequences} to access
* @return the {@code Access} to {@link CharSequence}s backed by {@code char} reads made in
* the specified byte order
* @param <T> the {@code CharSequence} subtype to access
* @see #toNativeCharSequence()
*/
public static <T extends CharSequence> Access<T> toCharSequence(ByteOrder backingOrder) {
return (Access<T>) CharSequenceAccess.charSequenceAccess(backingOrder);
}

/**
* Constructor for use in subclasses.
*/
protected Access() {}

/**
* Reads {@code [offset, offset + 7]} bytes of the byte sequence represented by the given
Expand All @@ -58,7 +167,13 @@ public interface Access<T> {
* @return eight bytes as a {@code long} value, in {@linkplain #byteOrder(Object) the expected
* order}
*/
long getLong(T input, long offset);
public long getLong(T input, long offset) {
if (byteOrder(input) == LITTLE_ENDIAN) {
return getUnsignedInt(input, offset) | (getUnsignedInt(input, offset + 4L) << 32);
} else {
return getUnsignedInt(input, offset + 4L) | (getUnsignedInt(input, offset) << 32);
}
}

/**
* Shortcut for {@code getInt(input, offset) & 0xFFFFFFFFL}. Could be implemented more
Expand All @@ -70,7 +185,9 @@ public interface Access<T> {
* @return four bytes as an unsigned int value, in {@linkplain #byteOrder(Object) the expected
* order}
*/
long getUnsignedInt(T input, long offset);
public long getUnsignedInt(T input, long offset) {
return ((long) getInt(input, offset)) & 0xFFFFFFFFL;
}

/**
* Reads {@code [offset, offset + 3]} bytes of the byte sequence represented by the given
Expand All @@ -82,7 +199,13 @@ public interface Access<T> {
* @return four bytes as an {@code int} value, in {@linkplain #byteOrder(Object) the expected
* order}
*/
int getInt(T input, long offset);
public int getInt(T input, long offset) {
if (byteOrder(input) == LITTLE_ENDIAN) {
return getUnsignedShort(input, offset) | (getUnsignedShort(input, offset + 2L) << 16);
} else {
return getUnsignedShort(input, offset + 2L) | (getUnsignedShort(input, offset) << 16);
}
}

/**
* Shortcut for {@code getShort(input, offset) & 0xFFFF}. Could be implemented more
Expand All @@ -94,7 +217,13 @@ public interface Access<T> {
* @return two bytes as an unsigned short value, in {@linkplain #byteOrder(Object) the expected
* order}
*/
int getUnsignedShort(T input, long offset);
public int getUnsignedShort(T input, long offset) {
if (byteOrder(input) == LITTLE_ENDIAN) {
return getUnsignedByte(input, offset) | (getUnsignedByte(input, offset + 1L) << 8);
} else {
return getUnsignedByte(input, offset + 1L) | (getUnsignedByte(input, offset) << 8);
}
}

/**
* Reads {@code [offset, offset + 1]} bytes of the byte sequence represented by the given
Expand All @@ -106,7 +235,9 @@ public interface Access<T> {
* @return two bytes as a {@code short} value, in {@linkplain #byteOrder(Object) the expected
* order}, widened to {@code int}
*/
int getShort(T input, long offset);
public int getShort(T input, long offset) {
return (int) (short) getUnsignedShort(input, offset);
}

/**
* Shortcut for {@code getByte(input, offset) & 0xFF}. Could be implemented more efficiently.
Expand All @@ -116,7 +247,9 @@ public interface Access<T> {
* by the given object
* @return a byte by the given {@code offset}, interpreted as unsigned
*/
int getUnsignedByte(T input, long offset);
public int getUnsignedByte(T input, long offset) {
return getByte(input, offset) & 0xFF;
}

/**
* Reads a single byte at the given {@code offset} in the byte sequence represented by the given
Expand All @@ -127,7 +260,7 @@ public interface Access<T> {
* by the given object
* @return a byte by the given {@code offset}, widened to {@code int}
*/
int getByte(T input, long offset);
public abstract int getByte(T input, long offset);

/**
* The byte order in which all multi-byte {@code getXXX()} reads from the given {@code input}
Expand All @@ -136,5 +269,5 @@ public interface Access<T> {
* @param input the accessed object
* @return the byte order of all multi-byte reads from the given {@code input}
*/
ByteOrder byteOrder(T input);
public abstract ByteOrder byteOrder(T input);
}
68 changes: 0 additions & 68 deletions src/main/java/net/openhft/chronicle/hash/hashing/Accesses.java

This file was deleted.

Expand Up @@ -19,8 +19,10 @@
import java.nio.ByteBuffer;
import java.nio.ByteOrder;

enum ByteBufferAccess implements Access<ByteBuffer> {
INSTANCE;
final class ByteBufferAccess extends Access<ByteBuffer> {
public static final ByteBufferAccess INSTANCE = new ByteBufferAccess();

private ByteBufferAccess() {}

@Override
public long getLong(ByteBuffer input, long offset) {
Expand Down
Expand Up @@ -20,8 +20,10 @@

import java.nio.ByteOrder;

enum BytesAccess implements Access<Bytes> {
INSTANCE;
public final class BytesAccess extends Access<Bytes> {
public static final BytesAccess INSTANCE = new BytesAccess();

private BytesAccess() {}

@Override
public long getLong(Bytes input, long offset) {
Expand Down Expand Up @@ -62,5 +64,4 @@ public int getByte(Bytes input, long offset) {
public ByteOrder byteOrder(Bytes input) {
return input.byteOrder();
}

}

0 comments on commit 6869783

Please sign in to comment.