Skip to content

Commit

Permalink
New KTypePgmIndex that learns a compact index on sorted keys and supp…
Browse files Browse the repository at this point in the history
…orts range search. (#39)

Co-authored-by: Dawid Weiss <dawid.weiss@carrotsearch.com>
  • Loading branch information
bruno-roustant and dweiss committed Aug 1, 2023
1 parent db42d3c commit c9497df
Show file tree
Hide file tree
Showing 15 changed files with 1,538 additions and 44 deletions.
2 changes: 1 addition & 1 deletion build.gradle
Expand Up @@ -8,7 +8,7 @@ plugins {

id 'de.thetaphi.forbiddenapis' version '3.2' apply false

id "me.champeau.gradle.jmh" version "0.5.0" apply false
id "me.champeau.gradle.jmh" version "0.5.3" apply false
}

rootProject.version = '0.10.0-SNAPSHOT'
Expand Down
6 changes: 6 additions & 0 deletions hppc-benchmarks/build.gradle
Expand Up @@ -18,6 +18,12 @@ jmh {
duplicateClassesStrategy = DuplicatesStrategy.WARN
}

jmhJar {
duplicatesStrategy = DuplicatesStrategy.WARN
exclude 'LICENSE'
exclude 'THIRD-PARTY'
}

task benchmark() {
dependsOn jmhJar

Expand Down
Expand Up @@ -37,6 +37,18 @@ public IntIntMapOps newIntIntMap(int expectedElements, double loadFactor) {
}
},

PGM {
@Override
public IntSetOps newIntSet(int expectedElements, double loadFactor) {
return new PgmIntSetOps(64, 32);
}

@Override
public IntIntMapOps newIntIntMap(int expectedElements, double loadFactor) {
throw new UnsupportedOperationException();
}
},

FASTUTIL {
@Override
public IntSetOps newIntSet(int expectedElements, double loadFactor) {
Expand Down
@@ -0,0 +1,62 @@
/*
* HPPC
*
* Copyright (C) 2010-2022 Carrot Search s.c.
* All rights reserved.
*
* Refer to the full license file "LICENSE.txt":
* https://github.com/carrotsearch/hppc/blob/master/LICENSE.txt
*/
package com.carrotsearch.hppc.benchmarks.implementations;

import com.carrotsearch.hppc.IntPgmIndex;
import com.carrotsearch.hppc.benchmarks.IntSetOps;
import java.util.Arrays;

public class PgmIntSetOps implements IntSetOps {
private IntPgmIndex.IntBuilder builder;
private int[] keys;
private IntPgmIndex delegate;

public PgmIntSetOps(int epsilon, int recursiveEpsilon) {
builder =
new IntPgmIndex.IntBuilder().setEpsilon(epsilon).setEpsilonRecursive(recursiveEpsilon);
}

@Override
public void add(int key) {
throw new UnsupportedOperationException();
}

@Override
public boolean contains(int key) {
return delegate.contains(key);
}

@Override
public void bulkAdd(int[] keys) {
if (this.keys != null) {
throw new UnsupportedOperationException("bulkAdd() can be called only once");
}
this.keys = keys;
Arrays.sort(keys);
delegate = builder.setSortedKeys(keys, keys.length).build();
builder = null;
}

@Override
public int bulkContains(int[] keys) {
int v = 0;
for (int key : keys) {
if (delegate.contains(key)) {
v++;
}
}
return v;
}

@Override
public int[] iterationOrderArray() {
return keys;
}
}
Expand Up @@ -137,4 +137,38 @@ public static <T> T add(T op1, T op2) {

throw new UnsupportedOperationException("Invalid for arbitrary types: " + op1 + " " + op2);
}

/**
* Returns the numerical value for the argument if it is a primitive template type. This intrinsic
* method always returns a {@code double} result for direct calls, but the template preprocessor
* will replace this method invocation with the exact type equal to the template type. So a call
* to:
*
* <pre>
* {@code Intrinsics.<KType>numeric(key)}
* </pre>
*
* with template type {@code KType} equal to {@code int} will return the raw key value (without
* any type conversion):
*
* <pre>
* {@code (key))
* </pre>
*
* <p>This intrinsic is used to apply arithmetic operations on keys. It is invalid for generic
* types.
*/
public static <T> double numeric(T e) {
if (e instanceof Byte
| e instanceof Character
| e instanceof Short
| e instanceof Integer
| e instanceof Float
| e instanceof Long
| e instanceof Double) {
return (double) e;
}

throw new UnsupportedOperationException("Invalid for generic type: " + e);
}
}
Expand Up @@ -21,6 +21,7 @@
import com.carrotsearch.hppc.generator.intrinsics.Equals;
import com.carrotsearch.hppc.generator.intrinsics.IsEmpty;
import com.carrotsearch.hppc.generator.intrinsics.NewArray;
import com.carrotsearch.hppc.generator.intrinsics.Numeric;
import com.carrotsearch.hppc.generator.parser.SignatureProcessor;
import java.io.IOException;
import java.io.StringWriter;
Expand Down Expand Up @@ -58,6 +59,7 @@ public class TemplateProcessor extends Command<ExitCode> {
intrinsics.put("cast", new Cast());
intrinsics.put("add", new Add());
intrinsics.put("equals", new Equals());
intrinsics.put("numeric", new Numeric());
}

@Parameter(names = {"--incremental"})
Expand Down
@@ -0,0 +1,31 @@
/*
* HPPC
*
* Copyright (C) 2010-2022 Carrot Search s.c.
* All rights reserved.
*
* Refer to the full license file "LICENSE.txt":
* https://github.com/carrotsearch/hppc/blob/master/LICENSE.txt
*/
package com.carrotsearch.hppc.generator.intrinsics;

import com.carrotsearch.hppc.generator.TemplateOptions;
import com.carrotsearch.hppc.generator.Type;
import java.util.ArrayList;
import java.util.regex.Matcher;

public class Numeric extends AbstractIntrinsicMethod {
@Override
public void invoke(
Matcher m,
StringBuilder sb,
TemplateOptions templateOptions,
String genericCast,
ArrayList<String> params) {
expectArgumentCount(m, params, 1);
if (inferTemplateType(m, templateOptions, genericCast) == Type.GENERIC) {
throw new RuntimeException("Can't get the numeric value of generic types: " + m.group());
}
sb.append(params.get(0));
}
}
59 changes: 59 additions & 0 deletions hppc/src/main/java/com/carrotsearch/hppc/IntGrowableArray.java
@@ -0,0 +1,59 @@
/*
* HPPC
*
* Copyright (C) 2010-2022 Carrot Search s.c.
* All rights reserved.
*
* Refer to the full license file "LICENSE.txt":
* https://github.com/carrotsearch/hppc/blob/master/LICENSE.txt
*/
package com.carrotsearch.hppc;

import java.util.Arrays;

/**
* Basic growable int array helper for HPPC templates (so before {@code IntArrayList} is generated).
*/
public class IntGrowableArray implements Accountable {

public int[] buffer;
public int size;

public IntGrowableArray(int initialCapacity) {
buffer = new int[initialCapacity];
}

public void add(int e) {
ensureBufferSpace(1);
buffer[size++] = e;
}

public int[] toArray() {
return buffer.length == size ? buffer : Arrays.copyOf(buffer, size);
}

private void ensureBufferSpace(int expectedAdditions) {
if (size + expectedAdditions > buffer.length) {
int newSize =
BoundedProportionalArraySizingStrategy.DEFAULT_INSTANCE.grow(
buffer.length, size, expectedAdditions);
buffer = Arrays.copyOf(buffer, newSize);
}
}

@Override
public long ramBytesAllocated() {
// int: size
return RamUsageEstimator.NUM_BYTES_OBJECT_HEADER
+ Integer.BYTES
+ RamUsageEstimator.shallowSizeOfArray(buffer);
}

@Override
public long ramBytesUsed() {
// int: size
return RamUsageEstimator.NUM_BYTES_OBJECT_HEADER
+ Integer.BYTES
+ RamUsageEstimator.shallowUsedSizeOfArray(buffer, size);
}
}
122 changes: 122 additions & 0 deletions hppc/src/main/java/com/carrotsearch/hppc/PgmIndexUtil.java
@@ -0,0 +1,122 @@
/*
* HPPC
*
* Copyright (C) 2010-2022 Carrot Search s.c.
* All rights reserved.
*
* Refer to the full license file "LICENSE.txt":
* https://github.com/carrotsearch/hppc/blob/master/LICENSE.txt
*/
package com.carrotsearch.hppc;

/** Utility methods for {@code KTypePgmIndex}. */
class PgmIndexUtil {

/** Adds the first key of the current segment to the segment data bytes. */
static <KType> void addKey(KType key, IntGrowableArray segmentData) {
throw new UnsupportedOperationException("Invalid for generic type: " + key);
}

/** Adds the first key of the current segment to the segment data bytes. */
static void addKey(int key, IntGrowableArray segmentData) {
segmentData.add(key);
}

/** Adds the first key of the current segment to the segment data bytes. */
static void addKey(float key, IntGrowableArray segmentData) {
addKey(Float.floatToIntBits(key), segmentData);
}

/** Adds the first key of the current segment to the segment data bytes. */
static void addKey(long key, IntGrowableArray segmentData) {
segmentData.add((int) key);
segmentData.add((int) (key >> 32));
}

/** Adds the first key of the current segment to the segment data bytes. */
static void addKey(double key, IntGrowableArray segmentData) {
addKey(Double.doubleToRawLongBits(key), segmentData);
}

/** Gets the first key of the segment at the given data index. */
static <KType> KType getKey(int segmentDataIndex, int[] segmentData, KType keyType) {
throw new UnsupportedOperationException("Invalid for generic type: " + keyType);
}

/** Gets the first key of the segment at the given data index. */
static int getKey(int segmentDataIndex, int[] segmentData, int keyType) {
return segmentData[segmentDataIndex];
}

/** Gets the first key of the segment at the given data index. */
static float getKey(int segmentDataIndex, int[] segmentData, float keyType) {
return Float.intBitsToFloat(getKey(segmentDataIndex, segmentData, 0));
}

/** Gets the first key of the segment at the given data index. */
static long getKey(int segmentDataIndex, int[] segmentData, long keyType) {
return (segmentData[segmentDataIndex] & 0xFFFFFFFFL)
| (((long) segmentData[segmentDataIndex + 1]) << 32);
}

/** Gets the first key of the segment at the given data index. */
static double getKey(int segmentDataIndex, int[] segmentData, double keyType) {
return Double.longBitsToDouble(getKey(segmentDataIndex, segmentData, 0L));
}

/**
* Adds the intercept of the current segment to the segment data bytes. The intercept is stored as
* an int for a key size equal to 1, otherwise it is stored as a long.
*
* @param keySize The size of the key, measure in {@link Integer#BYTES}.
*/
static void addIntercept(long intercept, IntGrowableArray segmentData, int keySize) {
assert keySize >= 1 && keySize <= 2;
if (keySize == 1) {
addKey((int) intercept, segmentData);
} else {
addKey(intercept, segmentData);
}
}

/**
* Gets the intercept of the segment at the given data index.
*
* @param keySize The size of the key, measure in {@link Integer#BYTES}.
*/
static long getIntercept(int segmentDataIndex, int[] segmentData, int keySize) {
assert keySize >= 1 && keySize <= 2;
if (keySize == 1) {
return getKey(segmentDataIndex, segmentData, 0);
}
return getKey(segmentDataIndex, segmentData, 0L);
}

/**
* Adds the slope of the current segment to the segment data bytes. The intercept is stored as a
* float for a key size equal to 1, otherwise it is stored as a double.
*
* @param keySize The size of the key, measure in {@link Integer#BYTES}.
*/
static void addSlope(double slope, IntGrowableArray segmentData, int keySize) {
assert keySize >= 1 && keySize <= 2;
if (keySize == 1) {
addKey((float) slope, segmentData);
} else {
addKey(slope, segmentData);
}
}

/**
* Gets the slope of the segment at the given data index.
*
* @param keySize The size of the key, measure in {@link Integer#BYTES}.
*/
static double getSlope(int segmentDataIndex, int[] segmentData, int keySize) {
assert keySize >= 1 && keySize <= 2;
if (keySize == 1) {
return getKey(segmentDataIndex, segmentData, 0f);
}
return getKey(segmentDataIndex, segmentData, 0d);
}
}

0 comments on commit c9497df

Please sign in to comment.