Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use custom radix sort for ordering blocks #1277

Merged
merged 6 commits into from Apr 1, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion buildSrc/build.gradle.kts
Expand Up @@ -54,7 +54,7 @@ dependencies {
implementation("net.ltgt.apt-idea:net.ltgt.apt-idea.gradle.plugin:0.21")
implementation("org.jfrog.buildinfo:build-info-extractor-gradle:4.9.7")
implementation("gradle.plugin.org.spongepowered:spongegradle:0.9.0")
implementation("net.minecraftforge.gradle:ForgeGradle:3.0.159")
implementation("net.minecraftforge.gradle:ForgeGradle:3.0.168")
implementation("net.fabricmc:fabric-loom:$loomVersion")
implementation("net.fabricmc:sponge-mixin:$mixinVersion")
implementation("gradle.plugin.com.mendhak.gradlecrowdin:plugin:0.1.0")
Expand Down
2 changes: 1 addition & 1 deletion buildSrc/src/main/kotlin/Versions.kt
Expand Up @@ -5,7 +5,7 @@ object Versions {
const val TEXT_EXTRAS = "3.0.3"
const val PISTON = "0.5.2"
const val AUTO_VALUE = "1.7"
const val JUNIT = "5.5.0"
const val JUNIT = "5.6.1"
const val MOCKITO = "3.3.3"
const val LOGBACK = "1.2.3"
}
Expand Down
2 changes: 1 addition & 1 deletion gradle.properties
@@ -1,7 +1,7 @@
group=com.sk89q.worldedit
version=7.2.0-SNAPSHOT

org.gradle.jvmargs=-Xmx1G
org.gradle.jvmargs=-Xmx1512M

loom.version=0.2.6-20200124.104118-60
mixin.version=0.8+build.17
4 changes: 4 additions & 0 deletions worldedit-core/build.gradle.kts
Expand Up @@ -45,6 +45,10 @@ dependencies {
"testImplementation"("ch.qos.logback:logback-classic:${Versions.LOGBACK}")
}

tasks.named<Test>("test") {
maxHeapSize = "1G"
}

tasks.withType<JavaCompile>().configureEach {
dependsOn(":worldedit-libs:build")
options.compilerArgs.add("-Aarg.name.key.prefix=")
Expand Down
Expand Up @@ -24,13 +24,15 @@
import com.sk89q.worldedit.extent.Extent;
import com.sk89q.worldedit.function.operation.Operation;
import com.sk89q.worldedit.function.operation.RunContext;
import com.sk89q.worldedit.internal.util.RegionOptimizedVectorSorter;
import com.sk89q.worldedit.math.BlockVector3;
import com.sk89q.worldedit.math.RegionOptimizedComparator;
import com.sk89q.worldedit.util.collection.BlockMap;
import com.sk89q.worldedit.world.block.BaseBlock;
import com.sk89q.worldedit.world.block.BlockStateHolder;

import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

/**
* A special extent that batches changes into Minecraft chunks. This helps
Expand Down Expand Up @@ -91,7 +93,9 @@ protected Operation commitBefore() {
@Override
public Operation resume(RunContext run) throws WorldEditException {
if (iterator == null) {
iterator = blockMap.keySet().parallelStream().sorted(RegionOptimizedComparator.INSTANCE).iterator();
List<BlockVector3> blockVectors = new ArrayList<>(blockMap.keySet());
RegionOptimizedVectorSorter.sort(blockVectors);
iterator = blockVectors.iterator();
}
while (iterator.hasNext()) {
BlockVector3 position = iterator.next();
Expand Down
@@ -0,0 +1,216 @@
/*
* WorldEdit, a Minecraft world manipulation toolkit
* Copyright (C) sk89q <http://www.sk89q.com>
* Copyright (C) WorldEdit team and contributors
*
* This program is free software: you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published by the
* Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

package com.sk89q.worldedit.internal.util;

import com.google.common.util.concurrent.ThreadFactoryBuilder;
import com.sk89q.worldedit.math.BlockVector3;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.ListIterator;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;

import static com.sk89q.worldedit.math.BitMath.mask;

/**
* Uses a radix sort to order vectors by region, then chunk, then Y value (max -> min).
*/
public class RegionOptimizedVectorSorter {

// We need to sort by region file, chunk, and Y (x/z don't really matter)
// Due to MC having x/z axes of only 60,000,000 blocks, the max value is <=26 bits.
// We can store the chunk in 4 bits less, 22 bits; and region in 5 bits less than that, 17 bits
// If we share the region + chunk bits, we can make a radix key that is really 5 parts:
// [region X (17)][region Z (17)][chunk X (5)][chunk Z (5)][block Y (20)] = 64 bits
// Even though we only normally need 8 bits for Y, we might as well use it for cubic chunks
// compatibility in the future, since we have the room in the long value
private static final int CHUNK_Z_SHIFT = 20;
private static final int CHUNK_X_SHIFT = 5 + CHUNK_Z_SHIFT;
private static final int REGION_Z_SHIFT = 5 + CHUNK_X_SHIFT;
private static final int REGION_X_SHIFT = 17 + REGION_Z_SHIFT;
private static final long REGION_X_MASK = ((long) mask(17)) << REGION_X_SHIFT;
private static final long REGION_Z_MASK = ((long) mask(17)) << REGION_Z_SHIFT;
private static final long CHUNK_X_MASK = ((long) mask(5)) << CHUNK_X_SHIFT;
private static final long CHUNK_Z_MASK = ((long) mask(5)) << CHUNK_Z_SHIFT;
private static final int Y_MAX = mask(20);
// We flip the region x/z sign to turn signed numbers into unsigned ones
// this allows us to sort on the raw bits, and not care about signs
// Essentially it transforms [negative values][positive values]
// to [positive value][even more positive values], i.e. a shift upwards
private static final long FLIP_REGION_X_SIGN = 0x1_00_00L << REGION_X_SHIFT;
private static final long FLIP_REGION_Z_SIGN = 0x1_00_00L << REGION_Z_SHIFT;

private static long key(BlockVector3 elem) {
long x = elem.getX();
long z = elem.getZ();
return (((x << (REGION_X_SHIFT - 9)) & REGION_X_MASK) ^ FLIP_REGION_X_SIGN)
| (((z << (REGION_Z_SHIFT - 9)) & REGION_Z_MASK) ^ FLIP_REGION_Z_SIGN)
| ((x << (CHUNK_X_SHIFT - 4)) & CHUNK_X_MASK)
| ((z << (CHUNK_Z_SHIFT - 4)) & CHUNK_Z_MASK)
| (Y_MAX - elem.getY());
}

private static final int NUMBER_OF_BITS = 64;
private static final int BITS_PER_SORT = 16;
private static final int MAX_FOR_BPS = 1 << BITS_PER_SORT;
private static final int MASK_FOR_BPS = (1 << BITS_PER_SORT) - 1;
private static final int NUMBER_OF_SORTS = NUMBER_OF_BITS / BITS_PER_SORT;
private static final int NUMBER_OF_CORES = Runtime.getRuntime().availableProcessors();
static int PARALLELISM_THRESHOLD;

static {
if (NUMBER_OF_CORES == 1) {
// don't even bother
PARALLELISM_THRESHOLD = Integer.MAX_VALUE;
} else {
// Determined via benchmarking serial vs. parallel.
// Didn't try anything more fine-grained that increments of 100,000.
PARALLELISM_THRESHOLD = 200000;
}
}

private static final ExecutorService SORT_SVC = Executors.newFixedThreadPool(
Runtime.getRuntime().availableProcessors(),
new ThreadFactoryBuilder()
.setDaemon(true)
.setNameFormat("worldedit-sort-svc-%d")
.build()
);

public static void sort(List<BlockVector3> vectors) {
sort(vectors.size() >= PARALLELISM_THRESHOLD, vectors);
}

/**
* For test purposes, or if you want to finely control when parallelism occurs.
*
* <p>
* {@code vectors} must be mutable, and will be sorted after this method returns.
* </p>
*
* @param parallel {@code true} to sort in parallel
* @param vectors the vectors to sort
*/
public static void sort(boolean parallel, List<BlockVector3> vectors) {
// Currently we don't do an in-place radix sort, but we could in the future.
int size = vectors.size();
// take care of some easy cases
if (size == 0 || size == 1) {
return;
}
BlockVector3[] source = vectors.toArray(new BlockVector3[0]);
BlockVector3[] sorted = new BlockVector3[size];
source = !parallel
? serialSort(source, size, sorted)
: parallelSort(source, size, sorted);
ListIterator<BlockVector3> it = vectors.listIterator();
for (BlockVector3 blockVector3 : source) {
it.next();
it.set(blockVector3);
}
}

private static BlockVector3[] parallelSort(BlockVector3[] source, int size, BlockVector3[] sorted) {
int[][] counts = new int[NUMBER_OF_CORES][MAX_FOR_BPS];
int[] finalCounts = new int[MAX_FOR_BPS];
int[] keys = new int[size];
List<Future<int[]>> tasks = new ArrayList<>(NUMBER_OF_CORES);
int kStep = (size + NUMBER_OF_CORES - 1) / NUMBER_OF_CORES;
for (int p = 0; p < NUMBER_OF_SORTS; p++) {
BlockVector3[] currentSource = source;
int shift = BITS_PER_SORT * p;
for (int c = 0; c < NUMBER_OF_CORES; c++) {
int[] localCounts = counts[c];
int kStart = kStep * c;
int kEnd = Math.min(kStart + kStep, size);
tasks.add(SORT_SVC.submit(() -> {
for (int i = kStart; i < kEnd; i++) {
int k = ((int) (key(currentSource[i]) >>> shift) & MASK_FOR_BPS);
keys[i] = k;
localCounts[k]++;
}
return localCounts;
}));
}
for (Future<int[]> task : tasks) {
try {
task.get();
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new IllegalStateException(e);
} catch (ExecutionException e) {
throw new RuntimeException(e);
}
}
for (int c = 0; c < NUMBER_OF_CORES; c++) {
int[] localCounts = counts[c];
for (int i = 0; i < MAX_FOR_BPS; i++) {
finalCounts[i] += localCounts[i];
localCounts[i] = 0;
}
}
tasks.clear();
copyByCounts(size, source, sorted, keys, finalCounts);
BlockVector3[] temp = source;
source = sorted;
sorted = temp;
}
// after the loop returns, source is now the final sorted array!
return source;
}

private static BlockVector3[] serialSort(BlockVector3[] source, int size, BlockVector3[] sorted) {
int[] counts = new int[MAX_FOR_BPS];
int[] keys = new int[size];
for (int p = 0; p < NUMBER_OF_SORTS; p++) {
for (int i = 0; i < size; i++) {
int k = ((int) (key(source[i]) >>> (BITS_PER_SORT * p)) & MASK_FOR_BPS);
keys[i] = k;
counts[k]++;
}
copyByCounts(size, source, sorted, keys, counts);
BlockVector3[] temp = source;
source = sorted;
sorted = temp;
}
// after the loop returns, source is now the final sorted array!
return source;
}

private static void copyByCounts(int size, BlockVector3[] source, BlockVector3[] sorted, int[] keys, int[] finalCounts) {
int lastCount = finalCounts[0];
for (int i = 1; i < MAX_FOR_BPS; i++) {
lastCount = (finalCounts[i] += lastCount);
}
for (int i = size - 1; i >= 0; i--) {
int key = keys[i];
int count = --finalCounts[key];
sorted[count] = source[i];
}
Arrays.fill(finalCounts, 0);
}

private RegionOptimizedVectorSorter() {
}
}