diff --git a/backend.native/cli.bc/src/org/jetbrains/kotlin/cli/bc/K2Native.kt b/backend.native/cli.bc/src/org/jetbrains/kotlin/cli/bc/K2Native.kt index 3163a532f4f..81a4e22c729 100644 --- a/backend.native/cli.bc/src/org/jetbrains/kotlin/cli/bc/K2Native.kt +++ b/backend.native/cli.bc/src/org/jetbrains/kotlin/cli/bc/K2Native.kt @@ -153,6 +153,7 @@ class K2Native : CLICompiler() { put(LIGHT_DEBUG, arguments.lightDebug) put(STATIC_FRAMEWORK, selectFrameworkType(configuration, arguments, outputKind)) put(OVERRIDE_CLANG_OPTIONS, arguments.clangOptions.toNonNullList()) + put(ALLOCATION_MODE, arguments.allocator) put(PRINT_IR, arguments.printIr) put(PRINT_IR_WITH_DESCRIPTORS, arguments.printIrWithDescriptors) diff --git a/backend.native/cli.bc/src/org/jetbrains/kotlin/cli/bc/K2NativeCompilerArguments.kt b/backend.native/cli.bc/src/org/jetbrains/kotlin/cli/bc/K2NativeCompilerArguments.kt index 173bc1d478d..ebcc4cd4595 100644 --- a/backend.native/cli.bc/src/org/jetbrains/kotlin/cli/bc/K2NativeCompilerArguments.kt +++ b/backend.native/cli.bc/src/org/jetbrains/kotlin/cli/bc/K2NativeCompilerArguments.kt @@ -242,6 +242,9 @@ class K2NativeCompilerArguments : CommonCompilerArguments() { @Argument(value="-Xoverride-clang-options", valueDescription = "", description = "Explicit list of Clang options") var clangOptions: Array? = null + @Argument(value="-Xallocator", valueDescription = "std | mimalloc", description = "Allocator used in runtime") + var allocator: String = "std" + override fun configureAnalysisFlags(collector: MessageCollector): MutableMap, Any> = super.configureAnalysisFlags(collector).also { val useExperimental = it[AnalysisFlags.useExperimental] as List<*> diff --git a/backend.native/compiler/ir/backend.native/src/org/jetbrains/kotlin/backend/konan/KonanConfig.kt b/backend.native/compiler/ir/backend.native/src/org/jetbrains/kotlin/backend/konan/KonanConfig.kt index ba9fd28ac12..6fb3a1de34d 100644 --- a/backend.native/compiler/ir/backend.native/src/org/jetbrains/kotlin/backend/konan/KonanConfig.kt +++ b/backend.native/compiler/ir/backend.native/src/org/jetbrains/kotlin/backend/konan/KonanConfig.kt @@ -13,20 +13,13 @@ import org.jetbrains.kotlin.config.CommonConfigurationKeys import org.jetbrains.kotlin.config.CompilerConfiguration import org.jetbrains.kotlin.descriptors.ModuleDescriptor import org.jetbrains.kotlin.konan.CURRENT +import org.jetbrains.kotlin.konan.CompilerVersion import org.jetbrains.kotlin.konan.MetaVersion import org.jetbrains.kotlin.konan.TempFiles import org.jetbrains.kotlin.konan.file.File import org.jetbrains.kotlin.konan.library.KonanLibrary import org.jetbrains.kotlin.konan.properties.loadProperties -import org.jetbrains.kotlin.konan.target.Distribution -import org.jetbrains.kotlin.konan.target.HostManager -import org.jetbrains.kotlin.konan.target.KonanTarget -import org.jetbrains.kotlin.konan.target.PlatformManager import org.jetbrains.kotlin.konan.target.* -import org.jetbrains.kotlin.util.Logger -import kotlin.system.exitProcess -import org.jetbrains.kotlin.library.toUnresolvedLibraries -import org.jetbrains.kotlin.konan.CompilerVersion import org.jetbrains.kotlin.library.KotlinLibrary import org.jetbrains.kotlin.library.resolver.TopologicalLibraryOrder @@ -116,6 +109,18 @@ class KonanConfig(val project: Project, val configuration: CompilerConfiguration add(if (debug) "debug.bc" else "release.bc") add(if (memoryModel == MemoryModel.STRICT) "strict.bc" else "relaxed.bc") if (shouldCoverLibraries || shouldCoverSources) add("profileRuntime.bc") + if (configuration.get(KonanConfigKeys.ALLOCATION_MODE) == "mimalloc") { + if (!target.supportsMimallocAllocator()) { + configuration.report(CompilerMessageSeverity.STRONG_WARNING, + "Mimalloc allocator isn't supported on target ${target.name}. Used standard mode.") + add("std_alloc.bc") + } else { + add("opt_alloc.bc") + add("mimalloc.bc") + } + } else { + add("std_alloc.bc") + } }.map { File(distribution.defaultNatives(target)).child(it).absolutePath } diff --git a/backend.native/compiler/ir/backend.native/src/org/jetbrains/kotlin/backend/konan/KonanConfigurationKeys.kt b/backend.native/compiler/ir/backend.native/src/org/jetbrains/kotlin/backend/konan/KonanConfigurationKeys.kt index b26c5af4d00..5e07cbecb82 100644 --- a/backend.native/compiler/ir/backend.native/src/org/jetbrains/kotlin/backend/konan/KonanConfigurationKeys.kt +++ b/backend.native/compiler/ir/backend.native/src/org/jetbrains/kotlin/backend/konan/KonanConfigurationKeys.kt @@ -88,6 +88,8 @@ class KonanConfigKeys { = CompilerConfigurationKey.create("program or library name") val OVERRIDE_CLANG_OPTIONS: CompilerConfigurationKey> = CompilerConfigurationKey.create("arguments for clang") + val ALLOCATION_MODE: CompilerConfigurationKey + = CompilerConfigurationKey.create("allocation mode") val PRINT_BITCODE: CompilerConfigurationKey = CompilerConfigurationKey.create("print bitcode") val PRINT_DESCRIPTORS: CompilerConfigurationKey diff --git a/backend.native/tests/build.gradle b/backend.native/tests/build.gradle index 3c2ce4608b7..83c3569a812 100644 --- a/backend.native/tests/build.gradle +++ b/backend.native/tests/build.gradle @@ -317,8 +317,8 @@ def createTestTasks(File testRoot, Class taskType, Closure taskConfigurati void dependsOnPlatformLibs(Task t) { if (!useCustomDist) { - def testTarget = project.testTarget - if (testTarget != null && testTarget != project.hostName) { + def testTarget = project.target + if (testTarget != project.platformManager.Companion.host) { t.dependsOn(":${testTarget}PlatformLibs") } else { t.dependsOn(':distPlatformLibs') diff --git a/build-tools/src/main/groovy/org/jetbrains/kotlin/CompileToBitcode.groovy b/build-tools/src/main/groovy/org/jetbrains/kotlin/CompileToBitcode.groovy deleted file mode 100644 index 8191f27d4c0..00000000000 --- a/build-tools/src/main/groovy/org/jetbrains/kotlin/CompileToBitcode.groovy +++ /dev/null @@ -1,146 +0,0 @@ -/* - * Copyright 2010-2017 JetBrains s.r.o. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.jetbrains.kotlin - -import org.gradle.api.DefaultTask -import org.gradle.api.tasks.* -import org.gradle.api.tasks.InputDirectory -import org.gradle.api.tasks.OutputFile -import org.gradle.api.tasks.TaskAction -import org.jetbrains.kotlin.konan.target.Family -import org.jetbrains.kotlin.konan.target.HostManager -import org.jetbrains.kotlin.konan.target.KonanTarget - -class CompileCppToBitcode extends DefaultTask { - private String name = "main" - private String target = "host" - private File srcRoot; - - protected List compilerArgs = [] - protected List linkerArgs = [] - - @InputDirectory - File getSrcRoot() { - return srcRoot ?: project.file("src/$name") - } - - @OutputFile - File getOutFile() { - return new File(getTargetDir(), "${name}.bc") - } - - private File getSrcDir() { - return new File(this.getSrcRoot(), "cpp") - } - - private File getHeadersDir() { - return new File(this.getSrcRoot(), "headers") - } - - private File getTargetDir() { - return new File(project.buildDir, target) - } - - private File getObjDir() { - return new File(getTargetDir(), name) - } - - void name(String value) { - name = value - } - - void target(String value) { - target = value - } - - void srcRoot(File value) { - srcRoot = value - } - - protected List getCompilerArgs() { - return compilerArgs - } - - protected List getLinkerArgs() { - return linkerArgs - } - - protected String getTarget() { - return target - } - - void compilerArgs(String... args) { - compilerArgs.addAll(args) - } - - void compilerArgs(List args) { - compilerArgs.addAll(args) - } - - void linkerArgs(String... args) { - linkerArgs.addAll(args) - } - - void linkerArgs(List args) { - linkerArgs.addAll(args) - } - - private Boolean targetingMinGW() { - def hostManager = new HostManager() - return hostManager.targetByName(this.target).family == Family.MINGW - } - - @TaskAction - void compile() { - // the strange code below seems to be required due to some Gradle (Groovy?) behaviour - File headersDir = this.getHeadersDir() - File srcDir = this.getSrcDir() - List compilerArgs = this.getCompilerArgs() - List linkerArgs = this.getLinkerArgs() - File objDir = this.getObjDir() - objDir.mkdirs() - Boolean targetingMinGW = this.targetingMinGW() - - project.execKonanClang(this.target) { - workingDir objDir - executable "clang++" - args '-std=c++14' - args '-Werror' - args '-O2' - if (!targetingMinGW) { - args '-fPIC' - } - args compilerArgs - - args "-I$headersDir" - args '-c', '-emit-llvm' - args project.fileTree(srcDir) { - include('**/*.cpp') - include('**/*.mm') // Objective-C++ - } - } - - project.exec { - executable "$project.llvmDir/bin/llvm-link" - args project.fileTree(objDir).include('**/*.bc').sort { a, b -> (a.name <=> b.name) } - - args linkerArgs - - args '-o', outFile - } - } -} diff --git a/build-tools/src/main/kotlin/org/jetbrains/kotlin/CompileToBitcode.kt b/build-tools/src/main/kotlin/org/jetbrains/kotlin/CompileToBitcode.kt new file mode 100644 index 00000000000..18e8c23da3a --- /dev/null +++ b/build-tools/src/main/kotlin/org/jetbrains/kotlin/CompileToBitcode.kt @@ -0,0 +1,97 @@ +/* + * Copyright 2010-2017 JetBrains s.r.o. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.jetbrains.kotlin + +import org.gradle.api.Action +import org.gradle.api.DefaultTask +import org.gradle.api.tasks.InputDirectory +import org.gradle.api.tasks.OutputFile +import org.gradle.api.tasks.TaskAction +import org.jetbrains.kotlin.konan.target.Family +import org.jetbrains.kotlin.konan.target.HostManager +import org.jetbrains.kotlin.konan.target.KonanTarget + +import java.io.File +import javax.inject.Inject + +open class CompileToBitcode @Inject constructor(@InputDirectory val srcRoot: File, + val folderName: String, + val target: String) : DefaultTask() { + enum class Language { + C, CPP + } + + val compilerArgs = mutableListOf() + val linkerArgs = mutableListOf() + val excludeFiles = mutableListOf() + var srcDir = File(srcRoot, "cpp") + var headersDir = File(srcRoot, "headers") + var skipLinkagePhase = false + var excludedTargets = mutableListOf() + var language = Language.CPP + + private val targetDir by lazy { File(project.buildDir, target) } + + private val objDir by lazy { File(targetDir, folderName) } + + private val KonanTarget.isMINGW + get() = this.family == Family.MINGW + + @OutputFile + val outFile = File(targetDir, "${folderName}.bc") + + @TaskAction + fun compile() { + if (target in excludedTargets) return + objDir.mkdirs() + val plugin = project.convention.getPlugin(ExecClang::class.java) + val commonFlags = listOf("-c", "-emit-llvm", "-I$headersDir") + val (executable, defaultFlags, srcFilesPatterns) = + when (language) { + Language.C -> Triple("clang", + // Used flags provided by original build of allocator C code. + commonFlags + listOf("-std=gnu11", "-O3", "-Wall", "-Wextra", "-Wno-unknown-pragmas", + "-ftls-model=initial-exec"), + listOf("**/*.c")) + Language.CPP -> Triple("clang++", + commonFlags + listOfNotNull("-std=c++14", "-Werror", "-O2", + "-fPIC".takeIf { !HostManager().targetByName(target).isMINGW }), + listOf("**/*.cpp", "**/*.mm")) + } + + plugin.execKonanClang(target, Action { + it.workingDir = objDir + it.executable = executable + it.args = defaultFlags + compilerArgs + + project.fileTree(srcDir) { + it.include(srcFilesPatterns) + it.exclude(excludeFiles) + }.files.map { it.absolutePath } + }) + + if (!skipLinkagePhase) { + project.exec { + val llvmDir = project.findProperty("llvmDir") + it.executable = "$llvmDir/bin/llvm-link" + it.args = listOf("-o", outFile.absolutePath) + linkerArgs + + project.fileTree(objDir) { + it.include("**/*.bc") + }.files.map { it.absolutePath } + } + } + } +} diff --git a/build-tools/src/main/kotlin/org/jetbrains/kotlin/Utils.kt b/build-tools/src/main/kotlin/org/jetbrains/kotlin/Utils.kt index 45507d13597..86afb4915c5 100644 --- a/build-tools/src/main/kotlin/org/jetbrains/kotlin/Utils.kt +++ b/build-tools/src/main/kotlin/org/jetbrains/kotlin/Utils.kt @@ -244,4 +244,7 @@ fun compileSwift(project: Project, target: KonanTarget, sources: List, o """.trimMargin()) check(exitCode == 0, { "Compilation failed" }) check(output.toFile().exists(), { "Compiler swiftc hasn't produced an output file: $output" }) -} \ No newline at end of file +} + +fun targetSupportsMimallocAllocator(targetName: String) = + HostManager().targetByName(targetName).supportsMimallocAllocator() \ No newline at end of file diff --git a/common/build.gradle b/common/build.gradle index c20d29ee972..14307301bd8 100644 --- a/common/build.gradle +++ b/common/build.gradle @@ -2,22 +2,16 @@ * Copyright 2010-2018 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license * that can be found in the LICENSE file. */ -import org.jetbrains.kotlin.CompileCppToBitcode +import org.jetbrains.kotlin.CompileToBitcode // TODO: consider using some Gradle plugins to build and test targetList.each { targetName -> - task ("${targetName}Hash", type: CompileCppToBitcode) { - name 'hash' - target targetName - } + tasks.create("${targetName}Hash", CompileToBitcode, file("src/hash"), 'hash', targetName) } targetList.each { targetName -> - task ("${targetName}Files", type: CompileCppToBitcode) { - name 'files' - target targetName - } + tasks.create("${targetName}Files", CompileToBitcode, file("src/files"), 'files', targetName) } task build { diff --git a/runtime/build.gradle b/runtime/build.gradle index 9e2bc0d5694..2275ee5ec1c 100644 --- a/runtime/build.gradle +++ b/runtime/build.gradle @@ -1,21 +1,23 @@ /* - * Copyright 2010-2018 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license + * Copyright 2010-2019 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license * that can be found in the LICENSE file. */ -import org.jetbrains.kotlin.CompileCppToBitcode +import org.jetbrains.kotlin.CompileToBitcode +import org.jetbrains.kotlin.UtilsKt // TODO: consider using some Gradle plugins to build and test -void includeRuntime(CompileCppToBitcode task) { - task.compilerArgs '-I' + project.file('../common/src/hash/headers') - task.compilerArgs '-I' + project.file('src/main/cpp') +void includeRuntime(CompileToBitcode task) { + task.compilerArgs.add('-I' + project.file('../common/src/hash/headers')) + task.compilerArgs.add('-I' + project.file('src/main/cpp')) } targetList.each { targetName -> - task("${targetName}Runtime", type: CompileCppToBitcode) { - name "runtime" - srcRoot file('src/main') + tasks.create("${targetName}Runtime", CompileToBitcode, file('src/main'), "runtime", targetName).configure { dependsOn ":common:${targetName}Hash" + dependsOn "${targetName}StdAlloc" + dependsOn "${targetName}OptAlloc" + dependsOn "${targetName}Mimalloc" dependsOn "${targetName}Launcher" dependsOn "${targetName}Debug" dependsOn "${targetName}Release" @@ -24,63 +26,53 @@ targetList.each { targetName -> dependsOn "${targetName}ProfileRuntime" dependsOn "${targetName}ObjC" dependsOn "${targetName}ExceptionsSupport" - target targetName includeRuntime(delegate) - linkerArgs project.file("../common/build/$targetName/hash.bc").path + linkerArgs.add(project.file("../common/build/$targetName/hash.bc").path) } - task("${targetName}Launcher", type: CompileCppToBitcode) { - name "launcher" - srcRoot file('src/launcher') - target targetName - includeRuntime(delegate) + tasks.create("${targetName}Mimalloc", CompileToBitcode, file('src/mimalloc'), "mimalloc", targetName).configure { + language = CompileToBitcode.Language.C + excludeFiles.addAll(["**/alloc-override*.c", "**/page-queue.c", "**/static.c"]) + if (!UtilsKt.targetSupportsMimallocAllocator(targetName)) + excludedTargets.add(targetName) + srcDir = new File(srcRoot, "c") + compilerArgs.add("-DKONAN_MI_MALLOC=1") + headersDir = new File(srcDir, "include") } - task ("${targetName}Debug", type: CompileCppToBitcode) { - name "debug" - srcRoot file('src/debug') - target targetName + tasks.create("${targetName}Launcher", CompileToBitcode, file('src/launcher'), "launcher", targetName).configure { includeRuntime(delegate) } - task ("${targetName}ExceptionsSupport", type: CompileCppToBitcode) { - name "exceptionsSupport" - srcRoot file('src/exceptions_support') - target targetName + tasks.create("${targetName}Debug", CompileToBitcode, file('src/debug'), "debug", targetName).configure { includeRuntime(delegate) } - task ("${targetName}Release", type: CompileCppToBitcode) { - name "release" - srcRoot file('src/release') - target targetName + tasks.create("${targetName}StdAlloc", CompileToBitcode, file('src/std_alloc'), "std_alloc", targetName) + + tasks.create("${targetName}OptAlloc", CompileToBitcode, file('src/opt_alloc'), "opt_alloc", targetName) + + tasks.create("${targetName}ExceptionsSupport", CompileToBitcode, file('src/exceptions_support'), + "exceptionsSupport", targetName).configure { includeRuntime(delegate) } - task ("${targetName}Strict", type: CompileCppToBitcode) { - name "strict" - srcRoot file('src/strict') - target targetName + tasks.create("${targetName}Release", CompileToBitcode, file('src/release'), "release", targetName).configure { includeRuntime(delegate) } - task ("${targetName}Relaxed", type: CompileCppToBitcode) { - name "relaxed" - srcRoot file('src/relaxed') - target targetName + tasks.create("${targetName}Strict", CompileToBitcode, file('src/strict'), "strict", targetName).configure { includeRuntime(delegate) } - task ("${targetName}ProfileRuntime", type: CompileCppToBitcode) { - name "profileRuntime" - srcRoot file('src/profile_runtime') - target targetName + tasks.create("${targetName}Relaxed", CompileToBitcode, file('src/relaxed'), "relaxed", targetName).configure { + includeRuntime(delegate) } - task ("${targetName}ObjC", type: CompileCppToBitcode) { - name "objc" - srcRoot file('src/objc') - target targetName + tasks.create("${targetName}ProfileRuntime", CompileToBitcode, file('src/profile_runtime'), + "profileRuntime", targetName) + + tasks.create("${targetName}ObjC", CompileToBitcode, file('src/objc'), "objc", targetName).configure { includeRuntime(delegate) } } diff --git a/runtime/src/main/cpp/Porting.cpp b/runtime/src/main/cpp/Porting.cpp index 21428161691..315700c8925 100644 --- a/runtime/src/main/cpp/Porting.cpp +++ b/runtime/src/main/cpp/Porting.cpp @@ -216,8 +216,10 @@ extern "C" void dlfree(void*); #define calloc_impl dlcalloc #define free_impl dlfree #else -#define calloc_impl ::calloc -#define free_impl ::free +extern "C" void* konan_calloc_impl(size_t, size_t); +extern "C" void konan_free_impl(void*); +#define calloc_impl konan_calloc_impl +#define free_impl konan_free_impl #endif void* calloc(size_t count, size_t size) { diff --git a/runtime/src/mimalloc/c/LICENSE b/runtime/src/mimalloc/c/LICENSE new file mode 100644 index 00000000000..4151dbe4ab1 --- /dev/null +++ b/runtime/src/mimalloc/c/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2019 Microsoft Corporation, Daan Leijen + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/runtime/src/mimalloc/c/alloc-aligned.c b/runtime/src/mimalloc/c/alloc-aligned.c new file mode 100644 index 00000000000..5a59a63ab26 --- /dev/null +++ b/runtime/src/mimalloc/c/alloc-aligned.c @@ -0,0 +1,204 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2018, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ + +#include "mimalloc.h" +#include "mimalloc-internal.h" + +#include // memset, memcpy + +// ------------------------------------------------------ +// Aligned Allocation +// ------------------------------------------------------ + +static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t size, const size_t alignment, const size_t offset, const bool zero) mi_attr_noexcept { + // note: we don't require `size > offset`, we just guarantee that + // the address at offset is aligned regardless of the allocated size. + mi_assert(alignment > 0 && alignment % sizeof(void*) == 0); + if (mi_unlikely(size > PTRDIFF_MAX)) return NULL; // we don't allocate more than PTRDIFF_MAX (see ) + if (mi_unlikely(alignment==0 || !_mi_is_power_of_two(alignment))) return NULL; // require power-of-two (see ) + const uintptr_t align_mask = alignment-1; // for any x, `(x & align_mask) == (x % alignment)` + + // try if there is a small block available with just the right alignment + if (mi_likely(size <= MI_SMALL_SIZE_MAX)) { + mi_page_t* page = _mi_heap_get_free_small_page(heap,size); + const bool is_aligned = (((uintptr_t)page->free+offset) & align_mask)==0; + if (mi_likely(page->free != NULL && is_aligned)) + { + #if MI_STAT>1 + mi_heap_stat_increase( heap, malloc, size); + #endif + void* p = _mi_page_malloc(heap,page,size); // TODO: inline _mi_page_malloc + mi_assert_internal(p != NULL); + mi_assert_internal(((uintptr_t)p + offset) % alignment == 0); + if (zero) _mi_block_zero_init(page,p,size); + return p; + } + } + + // use regular allocation if it is guaranteed to fit the alignment constraints + if (offset==0 && alignment<=size && size<=MI_MEDIUM_OBJ_SIZE_MAX && (size&align_mask)==0) { + void* p = _mi_heap_malloc_zero(heap, size, zero); + mi_assert_internal(p == NULL || ((uintptr_t)p % alignment) == 0); + return p; + } + + // otherwise over-allocate + void* p = _mi_heap_malloc_zero(heap, size + alignment - 1, zero); + if (p == NULL) return NULL; + + // .. and align within the allocation + uintptr_t adjust = alignment - (((uintptr_t)p + offset) & align_mask); + mi_assert_internal(adjust % sizeof(uintptr_t) == 0); + void* aligned_p = (adjust == alignment ? p : (void*)((uintptr_t)p + adjust)); + if (aligned_p != p) mi_page_set_has_aligned(_mi_ptr_page(p), true); + mi_assert_internal(((uintptr_t)aligned_p + offset) % alignment == 0); + mi_assert_internal( p == _mi_page_ptr_unalign(_mi_ptr_segment(aligned_p),_mi_ptr_page(aligned_p),aligned_p) ); + return aligned_p; +} + + +mi_decl_allocator void* mi_heap_malloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { + return mi_heap_malloc_zero_aligned_at(heap, size, alignment, offset, false); +} + +mi_decl_allocator void* mi_heap_malloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept { + return mi_heap_malloc_aligned_at(heap, size, alignment, 0); +} + +mi_decl_allocator void* mi_heap_zalloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { + return mi_heap_malloc_zero_aligned_at(heap, size, alignment, offset, true); +} + +mi_decl_allocator void* mi_heap_zalloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept { + return mi_heap_zalloc_aligned_at(heap, size, alignment, 0); +} + +mi_decl_allocator void* mi_heap_calloc_aligned_at(mi_heap_t* heap, size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { + size_t total; + if (mi_mul_overflow(count, size, &total)) return NULL; + return mi_heap_zalloc_aligned_at(heap, total, alignment, offset); +} + +mi_decl_allocator void* mi_heap_calloc_aligned(mi_heap_t* heap, size_t count, size_t size, size_t alignment) mi_attr_noexcept { + return mi_heap_calloc_aligned_at(heap,count,size,alignment,0); +} + +mi_decl_allocator void* mi_malloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept { + return mi_heap_malloc_aligned_at(mi_get_default_heap(), size, alignment, offset); +} + +mi_decl_allocator void* mi_malloc_aligned(size_t size, size_t alignment) mi_attr_noexcept { + return mi_heap_malloc_aligned(mi_get_default_heap(), size, alignment); +} + +mi_decl_allocator void* mi_zalloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept { + return mi_heap_zalloc_aligned_at(mi_get_default_heap(), size, alignment, offset); +} + +mi_decl_allocator void* mi_zalloc_aligned(size_t size, size_t alignment) mi_attr_noexcept { + return mi_heap_zalloc_aligned(mi_get_default_heap(), size, alignment); +} + +mi_decl_allocator void* mi_calloc_aligned_at(size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { + return mi_heap_calloc_aligned_at(mi_get_default_heap(), count, size, alignment, offset); +} + +mi_decl_allocator void* mi_calloc_aligned(size_t count, size_t size, size_t alignment) mi_attr_noexcept { + return mi_heap_calloc_aligned(mi_get_default_heap(), count, size, alignment); +} + + +static void* mi_heap_realloc_zero_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset, bool zero) mi_attr_noexcept { + mi_assert(alignment > 0); + if (alignment <= sizeof(uintptr_t)) return _mi_heap_realloc_zero(heap,p,newsize,zero); + if (p == NULL) return mi_heap_malloc_zero_aligned_at(heap,newsize,alignment,offset,zero); + size_t size = mi_usable_size(p); + if (newsize <= size && newsize >= (size - (size / 2)) + && (((uintptr_t)p + offset) % alignment) == 0) { + return p; // reallocation still fits, is aligned and not more than 50% waste + } + else { + void* newp = mi_heap_malloc_aligned_at(heap,newsize,alignment,offset); + if (newp != NULL) { + if (zero && newsize > size) { + const mi_page_t* page = _mi_ptr_page(newp); + if (page->is_zero) { + // already zero initialized + mi_assert_expensive(mi_mem_is_zero(newp,newsize)); + } + else { + // also set last word in the previous allocation to zero to ensure any padding is zero-initialized + size_t start = (size >= sizeof(intptr_t) ? size - sizeof(intptr_t) : 0); + memset((uint8_t*)newp + start, 0, newsize - start); + } + } + memcpy(newp, p, (newsize > size ? size : newsize)); + mi_free(p); // only free if successful + } + return newp; + } +} + +static void* mi_heap_realloc_zero_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, bool zero) mi_attr_noexcept { + mi_assert(alignment > 0); + if (alignment <= sizeof(uintptr_t)) return _mi_heap_realloc_zero(heap,p,newsize,zero); + size_t offset = ((uintptr_t)p % alignment); // use offset of previous allocation (p can be NULL) + return mi_heap_realloc_zero_aligned_at(heap,p,newsize,alignment,offset,zero); +} + +mi_decl_allocator void* mi_heap_realloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept { + return mi_heap_realloc_zero_aligned_at(heap,p,newsize,alignment,offset,false); +} + +mi_decl_allocator void* mi_heap_realloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment) mi_attr_noexcept { + return mi_heap_realloc_zero_aligned(heap,p,newsize,alignment,false); +} + +mi_decl_allocator void* mi_heap_rezalloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept { + return mi_heap_realloc_zero_aligned_at(heap, p, newsize, alignment, offset, true); +} + +mi_decl_allocator void* mi_heap_rezalloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment) mi_attr_noexcept { + return mi_heap_realloc_zero_aligned(heap, p, newsize, alignment, true); +} + +mi_decl_allocator void* mi_heap_recalloc_aligned_at(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { + size_t total; + if (mi_mul_overflow(newcount, size, &total)) return NULL; + return mi_heap_rezalloc_aligned_at(heap, p, total, alignment, offset); +} + +mi_decl_allocator void* mi_heap_recalloc_aligned(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept { + size_t total; + if (mi_mul_overflow(newcount, size, &total)) return NULL; + return mi_heap_rezalloc_aligned(heap, p, total, alignment); +} + +mi_decl_allocator void* mi_realloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept { + return mi_heap_realloc_aligned_at(mi_get_default_heap(), p, newsize, alignment, offset); +} + +mi_decl_allocator void* mi_realloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept { + return mi_heap_realloc_aligned(mi_get_default_heap(), p, newsize, alignment); +} + +mi_decl_allocator void* mi_rezalloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept { + return mi_heap_rezalloc_aligned_at(mi_get_default_heap(), p, newsize, alignment, offset); +} + +mi_decl_allocator void* mi_rezalloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept { + return mi_heap_rezalloc_aligned(mi_get_default_heap(), p, newsize, alignment); +} + +mi_decl_allocator void* mi_recalloc_aligned_at(void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { + return mi_heap_recalloc_aligned_at(mi_get_default_heap(), p, newcount, size, alignment, offset); +} + +mi_decl_allocator void* mi_recalloc_aligned(void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept { + return mi_heap_recalloc_aligned(mi_get_default_heap(), p, newcount, size, alignment); +} + diff --git a/runtime/src/mimalloc/c/alloc-override-osx.c b/runtime/src/mimalloc/c/alloc-override-osx.c new file mode 100644 index 00000000000..fef4b929f4e --- /dev/null +++ b/runtime/src/mimalloc/c/alloc-override-osx.c @@ -0,0 +1,232 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2018, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ + +#if !KONAN_MI_MALLOC +#include "mimalloc.h" +#include "mimalloc-internal.h" + +#if defined(MI_MALLOC_OVERRIDE) + +#if !defined(__APPLE__) +#error "this file should only be included on macOS" +#endif + +/* ------------------------------------------------------ + Override system malloc on macOS + This is done through the malloc zone interface. +------------------------------------------------------ */ + +#include +#include +#include // memset + +#if defined(MAC_OS_X_VERSION_10_6) && \ + MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6 +// only available from OSX 10.6 +extern malloc_zone_t* malloc_default_purgeable_zone(void) __attribute__((weak_import)); +#endif + + +/* ------------------------------------------------------ + malloc zone members +------------------------------------------------------ */ + +static size_t zone_size(malloc_zone_t* zone, const void* p) { + return 0; // as we cannot guarantee that `p` comes from us, just return 0 +} + +static void* zone_malloc(malloc_zone_t* zone, size_t size) { + return mi_malloc(size); +} + +static void* zone_calloc(malloc_zone_t* zone, size_t count, size_t size) { + return mi_calloc(count, size); +} + +static void* zone_valloc(malloc_zone_t* zone, size_t size) { + return mi_malloc_aligned(size, _mi_os_page_size()); +} + +static void zone_free(malloc_zone_t* zone, void* p) { + return mi_free(p); +} + +static void* zone_realloc(malloc_zone_t* zone, void* p, size_t newsize) { + return mi_realloc(p, newsize); +} + +static void* zone_memalign(malloc_zone_t* zone, size_t alignment, size_t size) { + return mi_malloc_aligned(size,alignment); +} + +static void zone_destroy(malloc_zone_t* zone) { + // todo: ignore for now? +} + +static unsigned zone_batch_malloc(malloc_zone_t* zone, size_t size, void** ps, unsigned count) { + size_t i; + for (i = 0; i < count; i++) { + ps[i] = zone_malloc(zone, size); + if (ps[i] == NULL) break; + } + return i; +} + +static void zone_batch_free(malloc_zone_t* zone, void** ps, unsigned count) { + for(size_t i = 0; i < count; i++) { + zone_free(zone, ps[i]); + ps[i] = NULL; + } +} + +static size_t zone_pressure_relief(malloc_zone_t* zone, size_t size) { + mi_collect(false); + return 0; +} + +static void zone_free_definite_size(malloc_zone_t* zone, void* p, size_t size) { + zone_free(zone,p); +} + + +/* ------------------------------------------------------ + Introspection members +------------------------------------------------------ */ + +static kern_return_t intro_enumerator(task_t task, void* p, + unsigned type_mask, vm_address_t zone_address, + memory_reader_t reader, + vm_range_recorder_t recorder) +{ + // todo: enumerate all memory + return KERN_SUCCESS; +} + +static size_t intro_good_size(malloc_zone_t* zone, size_t size) { + return mi_good_size(size); +} + +static boolean_t intro_check(malloc_zone_t* zone) { + return true; +} + +static void intro_print(malloc_zone_t* zone, boolean_t verbose) { + mi_stats_print(NULL); +} + +static void intro_log(malloc_zone_t* zone, void* p) { + // todo? +} + +static void intro_force_lock(malloc_zone_t* zone) { + // todo? +} + +static void intro_force_unlock(malloc_zone_t* zone) { + // todo? +} + +static void intro_statistics(malloc_zone_t* zone, malloc_statistics_t* stats) { + // todo... + stats->blocks_in_use = 0; + stats->size_in_use = 0; + stats->max_size_in_use = 0; + stats->size_allocated = 0; +} + +static boolean_t intro_zone_locked(malloc_zone_t* zone) { + return false; +} + + +/* ------------------------------------------------------ + At process start, override the default allocator +------------------------------------------------------ */ + +static malloc_zone_t* mi_get_default_zone() +{ + // The first returned zone is the real default + malloc_zone_t** zones = NULL; + unsigned count = 0; + kern_return_t ret = malloc_get_all_zones(0, NULL, (vm_address_t**)&zones, &count); + if (ret == KERN_SUCCESS && count > 0) { + return zones[0]; + } + else { + // fallback + return malloc_default_zone(); + } +} + + +static void __attribute__((constructor)) _mi_macos_override_malloc() +{ + static malloc_introspection_t intro; + memset(&intro, 0, sizeof(intro)); + + intro.enumerator = &intro_enumerator; + intro.good_size = &intro_good_size; + intro.check = &intro_check; + intro.print = &intro_print; + intro.log = &intro_log; + intro.force_lock = &intro_force_lock; + intro.force_unlock = &intro_force_unlock; + + static malloc_zone_t zone; + memset(&zone, 0, sizeof(zone)); + + zone.version = 4; + zone.zone_name = "mimalloc"; + zone.size = &zone_size; + zone.introspect = &intro; + zone.malloc = &zone_malloc; + zone.calloc = &zone_calloc; + zone.valloc = &zone_valloc; + zone.free = &zone_free; + zone.realloc = &zone_realloc; + zone.destroy = &zone_destroy; + zone.batch_malloc = &zone_batch_malloc; + zone.batch_free = &zone_batch_free; + + malloc_zone_t* purgeable_zone = NULL; + +#if defined(MAC_OS_X_VERSION_10_6) && \ + MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6 + // switch to version 9 on OSX 10.6 to support memalign. + zone.version = 9; + zone.memalign = &zone_memalign; + zone.free_definite_size = &zone_free_definite_size; + zone.pressure_relief = &zone_pressure_relief; + intro.zone_locked = &intro_zone_locked; + + // force the purgeable zone to exist to avoid strange bugs + if (malloc_default_purgeable_zone) { + purgeable_zone = malloc_default_purgeable_zone(); + } +#endif + + // Register our zone + malloc_zone_register(&zone); + + // Unregister the default zone, this makes our zone the new default + // as that was the last registered. + malloc_zone_t *default_zone = mi_get_default_zone(); + malloc_zone_unregister(default_zone); + + // Reregister the default zone so free and realloc in that zone keep working. + malloc_zone_register(default_zone); + + // Unregister, and re-register the purgeable_zone to avoid bugs if it occurs + // earlier than the default zone. + if (purgeable_zone != NULL) { + malloc_zone_unregister(purgeable_zone); + malloc_zone_register(purgeable_zone); + } +} + +#endif // MI_MALLOC_OVERRIDE +#endif \ No newline at end of file diff --git a/runtime/src/mimalloc/c/alloc-override.c b/runtime/src/mimalloc/c/alloc-override.c new file mode 100644 index 00000000000..3bde565ab35 --- /dev/null +++ b/runtime/src/mimalloc/c/alloc-override.c @@ -0,0 +1,197 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2018, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ +#if !KONAN_MI_MALLOC +#if !defined(MI_IN_ALLOC_C) +#error "this file should be included from 'alloc.c' (so aliases can work)" +#endif + +#if defined(MI_MALLOC_OVERRIDE) && defined(_WIN32) && !(defined(MI_SHARED_LIB) && defined(_DLL)) +#error "It is only possible to override "malloc" on Windows when building as a DLL (and linking the C runtime as a DLL)" +#endif + +#if defined(MI_MALLOC_OVERRIDE) && !defined(_WIN32) + +// ------------------------------------------------------ +// Override system malloc +// ------------------------------------------------------ + +#if (defined(__GNUC__) || defined(__clang__)) && !defined(__MACH__) + // use aliasing to alias the exported function to one of our `mi_` functions + #if (defined(__GNUC__) && __GNUC__ >= 9) + #define MI_FORWARD(fun) __attribute__((alias(#fun), used, visibility("default"), copy(fun))) + #else + #define MI_FORWARD(fun) __attribute__((alias(#fun), used, visibility("default"))) + #endif + #define MI_FORWARD1(fun,x) MI_FORWARD(fun) + #define MI_FORWARD2(fun,x,y) MI_FORWARD(fun) + #define MI_FORWARD3(fun,x,y,z) MI_FORWARD(fun) + #define MI_FORWARD0(fun,x) MI_FORWARD(fun) + #define MI_FORWARD02(fun,x,y) MI_FORWARD(fun) +#else + // use forwarding by calling our `mi_` function + #define MI_FORWARD1(fun,x) { return fun(x); } + #define MI_FORWARD2(fun,x,y) { return fun(x,y); } + #define MI_FORWARD3(fun,x,y,z) { return fun(x,y,z); } + #define MI_FORWARD0(fun,x) { fun(x); } + #define MI_FORWARD02(fun,x,y) { fun(x,y); } +#endif + +#if defined(__APPLE__) && defined(MI_SHARED_LIB_EXPORT) && defined(MI_INTERPOSE) + // use interposing so `DYLD_INSERT_LIBRARIES` works without `DYLD_FORCE_FLAT_NAMESPACE=1` + // See: + struct mi_interpose_s { + const void* replacement; + const void* target; + }; + #define MI_INTERPOSEX(oldfun,newfun) { (const void*)&newfun, (const void*)&oldfun } + #define MI_INTERPOSE_MI(fun) MI_INTERPOSEX(fun,mi_##fun) + __attribute__((used)) static struct mi_interpose_s _mi_interposes[] __attribute__((section("__DATA, __interpose"))) = + { + MI_INTERPOSE_MI(malloc), + MI_INTERPOSE_MI(calloc), + MI_INTERPOSE_MI(realloc), + MI_INTERPOSE_MI(free), + MI_INTERPOSE_MI(strdup), + MI_INTERPOSE_MI(strndup) + }; +#elif defined(_MSC_VER) + // cannot override malloc unless using a dll. + // we just override new/delete which does work in a static library. +#else + // On all other systems forward to our API + void* malloc(size_t size) mi_attr_noexcept MI_FORWARD1(mi_malloc, size); + void* calloc(size_t size, size_t n) mi_attr_noexcept MI_FORWARD2(mi_calloc, size, n); + void* realloc(void* p, size_t newsize) mi_attr_noexcept MI_FORWARD2(mi_realloc, p, newsize); + void free(void* p) mi_attr_noexcept MI_FORWARD0(mi_free, p); +#endif + +#if (defined(__GNUC__) || defined(__clang__)) && !defined(__MACH__) +#pragma GCC visibility push(default) +#endif + +// ------------------------------------------------------ +// Override new/delete +// This is not really necessary as they usually call +// malloc/free anyway, but it improves performance. +// ------------------------------------------------------ +#ifdef __cplusplus + // ------------------------------------------------------ + // With a C++ compiler we override the new/delete operators. + // see + // ------------------------------------------------------ + #include + void operator delete(void* p) noexcept MI_FORWARD0(mi_free,p); + void operator delete[](void* p) noexcept MI_FORWARD0(mi_free,p); + + void* operator new(std::size_t n) noexcept(false) MI_FORWARD1(mi_new,n); + void* operator new[](std::size_t n) noexcept(false) MI_FORWARD1(mi_new,n); + + void* operator new (std::size_t n, const std::nothrow_t& tag) noexcept { UNUSED(tag); return mi_new_nothrow(n); } + void* operator new[](std::size_t n, const std::nothrow_t& tag) noexcept { UNUSED(tag); return mi_new_nothrow(n); } + + #if (__cplusplus >= 201402L || _MSC_VER >= 1916) + void operator delete (void* p, std::size_t n) MI_FORWARD02(mi_free_size,p,n); + void operator delete[](void* p, std::size_t n) MI_FORWARD02(mi_free_size,p,n); + #endif + + #if (__cplusplus > 201402L || defined(__cpp_aligned_new)) + void operator delete (void* p, std::align_val_t al) noexcept { mi_free_aligned(p, static_cast(al)); } + void operator delete[](void* p, std::align_val_t al) noexcept { mi_free_aligned(p, static_cast(al)); } + void operator delete (void* p, std::size_t n, std::align_val_t al) noexcept { mi_free_size_aligned(p, n, static_cast(al)); }; + void operator delete[](void* p, std::size_t n, std::align_val_t al) noexcept { mi_free_size_aligned(p, n, static_cast(al)); }; + + void* operator new( std::size_t n, std::align_val_t al) noexcept(false) { return mi_new_aligned(n, static_cast(al)); } + void* operator new[]( std::size_t n, std::align_val_t al) noexcept(false) { return mi_new_aligned(n, static_cast(al)); } + void* operator new (std::size_t n, std::align_val_t al, const std::nothrow_t&) noexcept { return mi_new_aligned_nothrow(n, static_cast(al)); } + void* operator new[](std::size_t n, std::align_val_t al, const std::nothrow_t&) noexcept { return mi_new_aligned_nothrow(n, static_cast(al)); } + #endif + +#elif (defined(__GNUC__) || defined(__clang__)) + // ------------------------------------------------------ + // Override by defining the mangled C++ names of the operators (as + // used by GCC and CLang). + // See + // ------------------------------------------------------ + void _ZdlPv(void* p) MI_FORWARD0(mi_free,p); // delete + void _ZdaPv(void* p) MI_FORWARD0(mi_free,p); // delete[] + void _ZdlPvm(void* p, size_t n) MI_FORWARD02(mi_free_size,p,n); + void _ZdaPvm(void* p, size_t n) MI_FORWARD02(mi_free_size,p,n); + void _ZdlPvSt11align_val_t(void* p, size_t al) { mi_free_aligned(p,al); } + void _ZdaPvSt11align_val_t(void* p, size_t al) { mi_free_aligned(p,al); } + void _ZdlPvmSt11align_val_t(void* p, size_t n, size_t al) { mi_free_size_aligned(p,n,al); } + void _ZdaPvmSt11align_val_t(void* p, size_t n, size_t al) { mi_free_size_aligned(p,n,al); } + + typedef struct mi_nothrow_s { } mi_nothrow_t; + #if (MI_INTPTR_SIZE==8) + void* _Znwm(size_t n) MI_FORWARD1(mi_new,n); // new 64-bit + void* _Znam(size_t n) MI_FORWARD1(mi_new,n); // new[] 64-bit + void* _ZnwmSt11align_val_t(size_t n, size_t al) MI_FORWARD2(mi_new_aligned, n, al); + void* _ZnamSt11align_val_t(size_t n, size_t al) MI_FORWARD2(mi_new_aligned, n, al); + void* _ZnwmRKSt9nothrow_t(size_t n, mi_nothrow_t tag) { UNUSED(tag); return mi_new_nothrow(n); } + void* _ZnamRKSt9nothrow_t(size_t n, mi_nothrow_t tag) { UNUSED(tag); return mi_new_nothrow(n); } + void* _ZnwmSt11align_val_tRKSt9nothrow_t(size_t n, size_t al, mi_nothrow_t tag) { UNUSED(tag); return mi_new_aligned_nothrow(n,al); } + void* _ZnamSt11align_val_tRKSt9nothrow_t(size_t n, size_t al, mi_nothrow_t tag) { UNUSED(tag); return mi_new_aligned_nothrow(n,al); } + #elif (MI_INTPTR_SIZE==4) + void* _Znwj(size_t n) MI_FORWARD1(mi_new,n); // new 64-bit + void* _Znaj(size_t n) MI_FORWARD1(mi_new,n); // new[] 64-bit + void* _ZnwjSt11align_val_t(size_t n, size_t al) MI_FORWARD2(mi_new_aligned, n, al); + void* _ZnajSt11align_val_t(size_t n, size_t al) MI_FORWARD2(mi_new_aligned, n, al); + void* _ZnwjRKSt9nothrow_t(size_t n, mi_nothrow_t tag) { UNUSED(tag); return mi_new_nothrow(n); } + void* _ZnajRKSt9nothrow_t(size_t n, mi_nothrow_t tag) { UNUSED(tag); return mi_new_nothrow(n); } + void* _ZnwjSt11align_val_tRKSt9nothrow_t(size_t n, size_t al, mi_nothrow_t tag) { UNUSED(tag); return mi_new_aligned_nothrow(n,al); } + void* _ZnajSt11align_val_tRKSt9nothrow_t(size_t n, size_t al, mi_nothrow_t tag) { UNUSED(tag); return mi_new_aligned_nothrow(n,al); } + #else + #error "define overloads for new/delete for this platform (just for performance, can be skipped)" + #endif +#endif // __cplusplus + + +#ifdef __cplusplus +extern "C" { +#endif + +// ------------------------------------------------------ +// Posix & Unix functions definitions +// ------------------------------------------------------ + +void* reallocf(void* p, size_t newsize) MI_FORWARD2(mi_reallocf,p,newsize); +size_t malloc_size(void* p) MI_FORWARD1(mi_usable_size,p); +size_t malloc_usable_size(void *p) MI_FORWARD1(mi_usable_size,p); +void cfree(void* p) MI_FORWARD0(mi_free, p); + +// no forwarding here due to aliasing/name mangling issues +void* valloc(size_t size) { return mi_valloc(size); } +void* pvalloc(size_t size) { return mi_pvalloc(size); } +void* reallocarray(void* p, size_t count, size_t size) { return mi_reallocarray(p, count, size); } +void* memalign(size_t alignment, size_t size) { return mi_memalign(alignment, size); } +void* aligned_alloc(size_t alignment, size_t size) { return mi_aligned_alloc(alignment, size); } +int posix_memalign(void** p, size_t alignment, size_t size) { return mi_posix_memalign(p, alignment, size); } + +#if defined(__GLIBC__) && defined(__linux__) + // forward __libc interface (needed for glibc-based Linux distributions) + void* __libc_malloc(size_t size) MI_FORWARD1(mi_malloc,size); + void* __libc_calloc(size_t count, size_t size) MI_FORWARD2(mi_calloc,count,size); + void* __libc_realloc(void* p, size_t size) MI_FORWARD2(mi_realloc,p,size); + void __libc_free(void* p) MI_FORWARD0(mi_free,p); + void __libc_cfree(void* p) MI_FORWARD0(mi_free,p); + + void* __libc_valloc(size_t size) { return mi_valloc(size); } + void* __libc_pvalloc(size_t size) { return mi_pvalloc(size); } + void* __libc_memalign(size_t alignment, size_t size) { return mi_memalign(alignment,size); } + int __posix_memalign(void** p, size_t alignment, size_t size) { return mi_posix_memalign(p,alignment,size); } +#endif + +#ifdef __cplusplus +} +#endif + +#if (defined(__GNUC__) || defined(__clang__)) && !defined(__MACH__) +#pragma GCC visibility pop +#endif + +#endif // MI_MALLOC_OVERRIDE && !_WIN32 +#endif diff --git a/runtime/src/mimalloc/c/alloc-posix.c b/runtime/src/mimalloc/c/alloc-posix.c new file mode 100644 index 00000000000..505e42e4489 --- /dev/null +++ b/runtime/src/mimalloc/c/alloc-posix.c @@ -0,0 +1,151 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2018,2019, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ + +// ------------------------------------------------------------------------ +// mi prefixed publi definitions of various Posix, Unix, and C++ functions +// for convenience and used when overriding these functions. +// ------------------------------------------------------------------------ + +#include "mimalloc.h" +#include "mimalloc-internal.h" + +// ------------------------------------------------------ +// Posix & Unix functions definitions +// ------------------------------------------------------ + +#include +#include // memcpy +#include // getenv + +#ifndef EINVAL +#define EINVAL 22 +#endif +#ifndef ENOMEM +#define ENOMEM 12 +#endif + + +size_t mi_malloc_size(const void* p) mi_attr_noexcept { + return mi_usable_size(p); +} + +size_t mi_malloc_usable_size(const void *p) mi_attr_noexcept { + return mi_usable_size(p); +} + +void mi_cfree(void* p) mi_attr_noexcept { + if (mi_is_in_heap_region(p)) { + mi_free(p); + } +} + +int mi_posix_memalign(void** p, size_t alignment, size_t size) mi_attr_noexcept { + // Note: The spec dictates we should not modify `*p` on an error. (issue#27) + // + if (p == NULL) return EINVAL; + if (alignment % sizeof(void*) != 0) return EINVAL; // natural alignment + if (!_mi_is_power_of_two(alignment)) return EINVAL; // not a power of 2 + void* q = mi_malloc_aligned(size, alignment); + if (q==NULL && size != 0) return ENOMEM; + *p = q; + return 0; +} + +void* mi_memalign(size_t alignment, size_t size) mi_attr_noexcept { + return mi_malloc_aligned(size, alignment); +} + +void* mi_valloc(size_t size) mi_attr_noexcept { + return mi_malloc_aligned(size, _mi_os_page_size()); +} + +void* mi_pvalloc(size_t size) mi_attr_noexcept { + size_t psize = _mi_os_page_size(); + if (size >= SIZE_MAX - psize) return NULL; // overflow + size_t asize = ((size + psize - 1) / psize) * psize; + return mi_malloc_aligned(asize, psize); +} + +void* mi_aligned_alloc(size_t alignment, size_t size) mi_attr_noexcept { + if (alignment==0 || !_mi_is_power_of_two(alignment)) return NULL; + if ((size&(alignment-1)) != 0) return NULL; // C11 requires integral multiple, see + return mi_malloc_aligned(size, alignment); +} + +void* mi_reallocarray( void* p, size_t count, size_t size ) mi_attr_noexcept { // BSD + void* newp = mi_reallocn(p,count,size); + if (newp==NULL) errno = ENOMEM; + return newp; +} + +void* mi__expand(void* p, size_t newsize) mi_attr_noexcept { // Microsoft + void* res = mi_expand(p, newsize); + if (res == NULL) errno = ENOMEM; + return res; +} + +unsigned short* mi_wcsdup(const unsigned short* s) mi_attr_noexcept { + if (s==NULL) return NULL; + size_t len; + for(len = 0; s[len] != 0; len++) { } + size_t size = (len+1)*sizeof(unsigned short); + unsigned short* p = (unsigned short*)mi_malloc(size); + if (p != NULL) { + memcpy(p,s,size); + } + return p; +} + +unsigned char* mi_mbsdup(const unsigned char* s) mi_attr_noexcept { + return (unsigned char*)mi_strdup((const char*)s); +} + +int mi_dupenv_s(char** buf, size_t* size, const char* name) mi_attr_noexcept { + if (buf==NULL || name==NULL) return EINVAL; + if (size != NULL) *size = 0; + #pragma warning(suppress:4996) + char* p = getenv(name); + if (p==NULL) { + *buf = NULL; + } + else { + *buf = mi_strdup(p); + if (*buf==NULL) return ENOMEM; + if (size != NULL) *size = strlen(p); + } + return 0; +} + +int mi_wdupenv_s(unsigned short** buf, size_t* size, const unsigned short* name) mi_attr_noexcept { + if (buf==NULL || name==NULL) return EINVAL; + if (size != NULL) *size = 0; +#if !defined(_WIN32) || (defined(WINAPI_FAMILY) && (WINAPI_FAMILY != WINAPI_FAMILY_DESKTOP_APP)) + // not supported + *buf = NULL; + return EINVAL; +#else + #pragma warning(suppress:4996) + unsigned short* p = (unsigned short*)_wgetenv((const wchar_t*)name); + if (p==NULL) { + *buf = NULL; + } + else { + *buf = mi_wcsdup(p); + if (*buf==NULL) return ENOMEM; + if (size != NULL) *size = wcslen((const wchar_t*)p); + } + return 0; +#endif +} + +void* mi_aligned_offset_recalloc(void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { // Microsoft + return mi_recalloc_aligned_at(p, newcount, size, alignment, offset); +} + +void* mi_aligned_recalloc(void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept { // Microsoft + return mi_recalloc_aligned(p, newcount, size, alignment); +} diff --git a/runtime/src/mimalloc/c/alloc.c b/runtime/src/mimalloc/c/alloc.c new file mode 100644 index 00000000000..e68b48d2025 --- /dev/null +++ b/runtime/src/mimalloc/c/alloc.c @@ -0,0 +1,707 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2018, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ +#include "mimalloc.h" +#include "mimalloc-internal.h" +#include "mimalloc-atomic.h" + +#include // memset, memcpy, strlen +#include // malloc, exit + +#define MI_IN_ALLOC_C +#include "alloc-override.c" +#undef MI_IN_ALLOC_C + +// ------------------------------------------------------ +// Allocation +// ------------------------------------------------------ + +// Fast allocation in a page: just pop from the free list. +// Fall back to generic allocation only if the list is empty. +extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept { + mi_assert_internal(page->block_size==0||page->block_size >= size); + mi_block_t* block = page->free; + if (mi_unlikely(block == NULL)) { + return _mi_malloc_generic(heap, size); // slow path + } + mi_assert_internal(block != NULL && _mi_ptr_page(block) == page); + // pop from the free list + page->free = mi_block_next(page,block); + page->used++; + mi_assert_internal(page->free == NULL || _mi_ptr_page(page->free) == page); +#if (MI_DEBUG!=0) + if (!page->is_zero) { memset(block, MI_DEBUG_UNINIT, size); } +#elif (MI_SECURE!=0) + block->next = 0; // don't leak internal data +#endif +#if (MI_STAT>1) + if(size <= MI_LARGE_OBJ_SIZE_MAX) { + size_t bin = _mi_bin(size); + mi_heap_stat_increase(heap,normal[bin], 1); + } +#endif + return block; +} + +// allocate a small block +extern inline mi_decl_allocator void* mi_heap_malloc_small(mi_heap_t* heap, size_t size) mi_attr_noexcept { + mi_assert(size <= MI_SMALL_SIZE_MAX); + mi_page_t* page = _mi_heap_get_free_small_page(heap,size); + return _mi_page_malloc(heap, page, size); +} + +extern inline mi_decl_allocator void* mi_malloc_small(size_t size) mi_attr_noexcept { + return mi_heap_malloc_small(mi_get_default_heap(), size); +} + + +// zero initialized small block +mi_decl_allocator void* mi_zalloc_small(size_t size) mi_attr_noexcept { + void* p = mi_malloc_small(size); + if (p != NULL) { memset(p, 0, size); } + return p; +} + +// The main allocation function +extern inline mi_decl_allocator void* mi_heap_malloc(mi_heap_t* heap, size_t size) mi_attr_noexcept { + mi_assert(heap!=NULL); + mi_assert(heap->thread_id == 0 || heap->thread_id == _mi_thread_id()); // heaps are thread local + void* p; + if (mi_likely(size <= MI_SMALL_SIZE_MAX)) { + p = mi_heap_malloc_small(heap, size); + } + else { + p = _mi_malloc_generic(heap, size); + } + #if MI_STAT>1 + if (p != NULL) { + if (!mi_heap_is_initialized(heap)) { heap = mi_get_default_heap(); } + mi_heap_stat_increase( heap, malloc, mi_good_size(size) ); // overestimate for aligned sizes + } + #endif + return p; +} + +extern inline mi_decl_allocator void* mi_malloc(size_t size) mi_attr_noexcept { + return mi_heap_malloc(mi_get_default_heap(), size); +} + +void _mi_block_zero_init(const mi_page_t* page, void* p, size_t size) { + // note: we need to initialize the whole block to zero, not just size + // or the recalloc/rezalloc functions cannot safely expand in place (see issue #63) + UNUSED(size); + mi_assert_internal(p != NULL); + mi_assert_internal(size > 0 && page->block_size >= size); + mi_assert_internal(_mi_ptr_page(p)==page); + if (page->is_zero) { + // already zero initialized memory? + ((mi_block_t*)p)->next = 0; // clear the free list pointer + mi_assert_expensive(mi_mem_is_zero(p,page->block_size)); + } + else { + // otherwise memset + memset(p, 0, page->block_size); + } +} + +void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero) { + void* p = mi_heap_malloc(heap,size); + if (zero && p != NULL) { + _mi_block_zero_init(_mi_ptr_page(p),p,size); // todo: can we avoid getting the page again? + } + return p; +} + +extern inline mi_decl_allocator void* mi_heap_zalloc(mi_heap_t* heap, size_t size) mi_attr_noexcept { + return _mi_heap_malloc_zero(heap, size, true); +} + +mi_decl_allocator void* mi_zalloc(size_t size) mi_attr_noexcept { + return mi_heap_zalloc(mi_get_default_heap(),size); +} + + +// ------------------------------------------------------ +// Check for double free in secure and debug mode +// This is somewhat expensive so only enabled for secure mode 4 +// ------------------------------------------------------ + +#if (MI_ENCODE_FREELIST && (MI_SECURE>=4 || MI_DEBUG!=0)) +// linear check if the free list contains a specific element +static bool mi_list_contains(const mi_page_t* page, const mi_block_t* list, const mi_block_t* elem) { + while (list != NULL) { + if (elem==list) return true; + list = mi_block_next(page, list); + } + return false; +} + +static mi_decl_noinline bool mi_check_is_double_freex(const mi_page_t* page, const mi_block_t* block, const mi_block_t* n) { + size_t psize; + uint8_t* pstart = _mi_page_start(_mi_page_segment(page), page, &psize); + if (n == NULL || ((uint8_t*)n >= pstart && (uint8_t*)n < (pstart + psize))) { + // Suspicious: the decoded value is in the same page (or NULL). + // Walk the free lists to verify positively if it is already freed + if (mi_list_contains(page, page->free, block) || + mi_list_contains(page, page->local_free, block) || + mi_list_contains(page, (const mi_block_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*,&page->thread_free)), block)) + { + _mi_fatal_error("double free detected of block %p with size %zu\n", block, page->block_size); + return true; + } + } + return false; +} + +static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block_t* block) { + mi_block_t* n = mi_block_nextx(page, block, page->cookie); // pretend it is freed, and get the decoded first field + if (((uintptr_t)n & (MI_INTPTR_SIZE-1))==0 && // quick check: aligned pointer? + (n==NULL || mi_is_in_same_segment(block, n))) // quick check: in same segment or NULL? + { + // Suspicous: decoded value in block is in the same segment (or NULL) -- maybe a double free? + // (continue in separate function to improve code generation) + return mi_check_is_double_freex(page, block, n); + } + return false; +} +#else +static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block_t* block) { + UNUSED(page); + UNUSED(block); + return false; +} +#endif + + +// ------------------------------------------------------ +// Free +// ------------------------------------------------------ + +// multi-threaded free +static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* block) +{ + mi_thread_free_t tfree; + mi_thread_free_t tfreex; + bool use_delayed; + + mi_segment_t* segment = _mi_page_segment(page); + if (segment->page_kind==MI_PAGE_HUGE) { + // huge page segments are always abandoned and can be freed immediately + mi_assert_internal(mi_atomic_read_relaxed(&segment->thread_id)==0); + mi_assert_internal(mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*,&segment->abandoned_next))==NULL); + // claim it and free + mi_heap_t* heap = mi_get_default_heap(); + // paranoia: if this it the last reference, the cas should always succeed + if (mi_atomic_cas_strong(&segment->thread_id,heap->thread_id,0)) { + mi_block_set_next(page, block, page->free); + page->free = block; + page->used--; + page->is_zero = false; + mi_assert(page->used == 0); + mi_tld_t* tld = heap->tld; + if (page->block_size > MI_HUGE_OBJ_SIZE_MAX) { + _mi_stat_decrease(&tld->stats.giant, page->block_size); + } + else { + _mi_stat_decrease(&tld->stats.huge, page->block_size); + } + _mi_segment_page_free(page,true,&tld->segments); + } + return; + } + + do { + tfree = page->thread_free; + use_delayed = (mi_tf_delayed(tfree) == MI_USE_DELAYED_FREE || + (mi_tf_delayed(tfree) == MI_NO_DELAYED_FREE && page->used == mi_atomic_read_relaxed(&page->thread_freed)+1) // data-race but ok, just optimizes early release of the page + ); + if (mi_unlikely(use_delayed)) { + // unlikely: this only happens on the first concurrent free in a page that is in the full list + tfreex = mi_tf_set_delayed(tfree,MI_DELAYED_FREEING); + } + else { + // usual: directly add to page thread_free list + mi_block_set_next(page, block, mi_tf_block(tfree)); + tfreex = mi_tf_set_block(tfree,block); + } + } while (!mi_atomic_cas_weak(mi_atomic_cast(uintptr_t,&page->thread_free), tfreex, tfree)); + + if (mi_likely(!use_delayed)) { + // increment the thread free count and return + mi_atomic_increment(&page->thread_freed); + } + else { + // racy read on `heap`, but ok because MI_DELAYED_FREEING is set (see `mi_heap_delete` and `mi_heap_collect_abandon`) + mi_heap_t* heap = (mi_heap_t*)mi_atomic_read_ptr(mi_atomic_cast(void*, &page->heap)); + mi_assert_internal(heap != NULL); + if (heap != NULL) { + // add to the delayed free list of this heap. (do this atomically as the lock only protects heap memory validity) + mi_block_t* dfree; + do { + dfree = (mi_block_t*)heap->thread_delayed_free; + mi_block_set_nextx(heap,block,dfree, heap->cookie); + } while (!mi_atomic_cas_ptr_weak(mi_atomic_cast(void*,&heap->thread_delayed_free), block, dfree)); + } + + // and reset the MI_DELAYED_FREEING flag + do { + tfreex = tfree = page->thread_free; + mi_assert_internal(mi_tf_delayed(tfree) == MI_NEVER_DELAYED_FREE || mi_tf_delayed(tfree) == MI_DELAYED_FREEING); + if (mi_tf_delayed(tfree) != MI_NEVER_DELAYED_FREE) tfreex = mi_tf_set_delayed(tfree,MI_NO_DELAYED_FREE); + } while (!mi_atomic_cas_weak(mi_atomic_cast(uintptr_t,&page->thread_free), tfreex, tfree)); + } +} + + +// regular free +static inline void _mi_free_block(mi_page_t* page, bool local, mi_block_t* block) +{ + #if (MI_DEBUG) + memset(block, MI_DEBUG_FREED, page->block_size); + #endif + + // and push it on the free list + if (mi_likely(local)) { + // owning thread can free a block directly + if (mi_check_is_double_free(page, block)) return; + mi_block_set_next(page, block, page->local_free); + page->local_free = block; + page->used--; + if (mi_unlikely(mi_page_all_free(page))) { + _mi_page_retire(page); + } + else if (mi_unlikely(mi_page_is_in_full(page))) { + _mi_page_unfull(page); + } + } + else { + _mi_free_block_mt(page,block); + } +} + + +// Adjust a block that was allocated aligned, to the actual start of the block in the page. +mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* page, const void* p) { + mi_assert_internal(page!=NULL && p!=NULL); + size_t diff = (uint8_t*)p - _mi_page_start(segment, page, NULL); + size_t adjust = (diff % page->block_size); + return (mi_block_t*)((uintptr_t)p - adjust); +} + + +static void mi_decl_noinline mi_free_generic(const mi_segment_t* segment, mi_page_t* page, bool local, void* p) { + mi_block_t* block = (mi_page_has_aligned(page) ? _mi_page_ptr_unalign(segment, page, p) : (mi_block_t*)p); + _mi_free_block(page, local, block); +} + +// Free a block +void mi_free(void* p) mi_attr_noexcept +{ +#if (MI_DEBUG>0) + if (mi_unlikely(((uintptr_t)p & (MI_INTPTR_SIZE - 1)) != 0)) { + _mi_error_message("trying to free an invalid (unaligned) pointer: %p\n", p); + return; + } +#endif + + const mi_segment_t* const segment = _mi_ptr_segment(p); + if (mi_unlikely(segment == NULL)) return; // checks for (p==NULL) + +#if (MI_DEBUG!=0) + if (mi_unlikely(!mi_is_in_heap_region(p))) { + _mi_warning_message("possibly trying to free a pointer that does not point to a valid heap region: 0x%p\n" + "(this may still be a valid very large allocation (over 64MiB))\n", p); + if (mi_likely(_mi_ptr_cookie(segment) == segment->cookie)) { + _mi_warning_message("(yes, the previous pointer 0x%p was valid after all)\n", p); + } + } +#endif +#if (MI_DEBUG!=0 || MI_SECURE>=4) + if (mi_unlikely(_mi_ptr_cookie(segment) != segment->cookie)) { + _mi_error_message("trying to free a pointer that does not point to a valid heap space: %p\n", p); + return; + } +#endif + + const uintptr_t tid = _mi_thread_id(); + mi_page_t* const page = _mi_segment_page_of(segment, p); + +#if (MI_STAT>1) + mi_heap_t* heap = mi_heap_get_default(); + mi_heap_stat_decrease(heap, malloc, mi_usable_size(p)); + if (page->block_size <= MI_LARGE_OBJ_SIZE_MAX) { + mi_heap_stat_decrease(heap, normal[_mi_bin(page->block_size)], 1); + } + // huge page stat is accounted for in `_mi_page_retire` +#endif + + if (mi_likely(tid == segment->thread_id && page->flags.full_aligned == 0)) { // the thread id matches and it is not a full page, nor has aligned blocks + // local, and not full or aligned + mi_block_t* block = (mi_block_t*)p; + if (mi_check_is_double_free(page,block)) return; + mi_block_set_next(page, block, page->local_free); + page->local_free = block; + page->used--; + if (mi_unlikely(mi_page_all_free(page))) { _mi_page_retire(page); } + } + else { + // non-local, aligned blocks, or a full page; use the more generic path + mi_free_generic(segment, page, tid == segment->thread_id, p); + } +} + +bool _mi_free_delayed_block(mi_block_t* block) { + // get segment and page + const mi_segment_t* segment = _mi_ptr_segment(block); + mi_assert_internal(_mi_ptr_cookie(segment) == segment->cookie); + mi_assert_internal(_mi_thread_id() == segment->thread_id); + mi_page_t* page = _mi_segment_page_of(segment, block); + if (mi_tf_delayed(page->thread_free) == MI_DELAYED_FREEING) { + // we might already start delayed freeing while another thread has not yet + // reset the delayed_freeing flag; in that case don't free it quite yet if + // this is the last block remaining. + if (page->used - page->thread_freed == 1) return false; + } + _mi_free_block(page,true,block); + return true; +} + +// Bytes available in a block +size_t mi_usable_size(const void* p) mi_attr_noexcept { + if (p==NULL) return 0; + const mi_segment_t* segment = _mi_ptr_segment(p); + const mi_page_t* page = _mi_segment_page_of(segment,p); + size_t size = page->block_size; + if (mi_unlikely(mi_page_has_aligned(page))) { + ptrdiff_t adjust = (uint8_t*)p - (uint8_t*)_mi_page_ptr_unalign(segment,page,p); + mi_assert_internal(adjust >= 0 && (size_t)adjust <= size); + return (size - adjust); + } + else { + return size; + } +} + + +// ------------------------------------------------------ +// ensure explicit external inline definitions are emitted! +// ------------------------------------------------------ + +#ifdef __cplusplus +void* _mi_externs[] = { + (void*)&_mi_page_malloc, + (void*)&mi_malloc, + (void*)&mi_malloc_small, + (void*)&mi_heap_malloc, + (void*)&mi_heap_zalloc, + (void*)&mi_heap_malloc_small +}; +#endif + + +// ------------------------------------------------------ +// Allocation extensions +// ------------------------------------------------------ + +void mi_free_size(void* p, size_t size) mi_attr_noexcept { + UNUSED_RELEASE(size); + mi_assert(p == NULL || size <= mi_usable_size(p)); + mi_free(p); +} + +void mi_free_size_aligned(void* p, size_t size, size_t alignment) mi_attr_noexcept { + UNUSED_RELEASE(alignment); + mi_assert(((uintptr_t)p % alignment) == 0); + mi_free_size(p,size); +} + +void mi_free_aligned(void* p, size_t alignment) mi_attr_noexcept { + UNUSED_RELEASE(alignment); + mi_assert(((uintptr_t)p % alignment) == 0); + mi_free(p); +} + +extern inline mi_decl_allocator void* mi_heap_calloc(mi_heap_t* heap, size_t count, size_t size) mi_attr_noexcept { + size_t total; + if (mi_mul_overflow(count,size,&total)) return NULL; + return mi_heap_zalloc(heap,total); +} + +mi_decl_allocator void* mi_calloc(size_t count, size_t size) mi_attr_noexcept { + return mi_heap_calloc(mi_get_default_heap(),count,size); +} + +// Uninitialized `calloc` +extern mi_decl_allocator void* mi_heap_mallocn(mi_heap_t* heap, size_t count, size_t size) mi_attr_noexcept { + size_t total; + if (mi_mul_overflow(count,size,&total)) return NULL; + return mi_heap_malloc(heap, total); +} + +mi_decl_allocator void* mi_mallocn(size_t count, size_t size) mi_attr_noexcept { + return mi_heap_mallocn(mi_get_default_heap(),count,size); +} + +// Expand in place or fail +mi_decl_allocator void* mi_expand(void* p, size_t newsize) mi_attr_noexcept { + if (p == NULL) return NULL; + size_t size = mi_usable_size(p); + if (newsize > size) return NULL; + return p; // it fits +} + +void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero) { + if (p == NULL) return _mi_heap_malloc_zero(heap,newsize,zero); + size_t size = mi_usable_size(p); + if (newsize <= size && newsize >= (size / 2)) { + return p; // reallocation still fits and not more than 50% waste + } + void* newp = mi_heap_malloc(heap,newsize); + if (mi_likely(newp != NULL)) { + if (zero && newsize > size) { + // also set last word in the previous allocation to zero to ensure any padding is zero-initialized + size_t start = (size >= sizeof(intptr_t) ? size - sizeof(intptr_t) : 0); + memset((uint8_t*)newp + start, 0, newsize - start); + } + memcpy(newp, p, (newsize > size ? size : newsize)); + mi_free(p); // only free if successful + } + return newp; +} + +mi_decl_allocator void* mi_heap_realloc(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept { + return _mi_heap_realloc_zero(heap, p, newsize, false); +} + +mi_decl_allocator void* mi_heap_reallocn(mi_heap_t* heap, void* p, size_t count, size_t size) mi_attr_noexcept { + size_t total; + if (mi_mul_overflow(count, size, &total)) return NULL; + return mi_heap_realloc(heap, p, total); +} + + +// Reallocate but free `p` on errors +mi_decl_allocator void* mi_heap_reallocf(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept { + void* newp = mi_heap_realloc(heap, p, newsize); + if (newp==NULL && p!=NULL) mi_free(p); + return newp; +} + +mi_decl_allocator void* mi_heap_rezalloc(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept { + return _mi_heap_realloc_zero(heap, p, newsize, true); +} + +mi_decl_allocator void* mi_heap_recalloc(mi_heap_t* heap, void* p, size_t count, size_t size) mi_attr_noexcept { + size_t total; + if (mi_mul_overflow(count, size, &total)) return NULL; + return mi_heap_rezalloc(heap, p, total); +} + + +mi_decl_allocator void* mi_realloc(void* p, size_t newsize) mi_attr_noexcept { + return mi_heap_realloc(mi_get_default_heap(),p,newsize); +} + +mi_decl_allocator void* mi_reallocn(void* p, size_t count, size_t size) mi_attr_noexcept { + return mi_heap_reallocn(mi_get_default_heap(),p,count,size); +} + +// Reallocate but free `p` on errors +mi_decl_allocator void* mi_reallocf(void* p, size_t newsize) mi_attr_noexcept { + return mi_heap_reallocf(mi_get_default_heap(),p,newsize); +} + +mi_decl_allocator void* mi_rezalloc(void* p, size_t newsize) mi_attr_noexcept { + return mi_heap_rezalloc(mi_get_default_heap(), p, newsize); +} + +mi_decl_allocator void* mi_recalloc(void* p, size_t count, size_t size) mi_attr_noexcept { + return mi_heap_recalloc(mi_get_default_heap(), p, count, size); +} + + + +// ------------------------------------------------------ +// strdup, strndup, and realpath +// ------------------------------------------------------ + +// `strdup` using mi_malloc +char* mi_heap_strdup(mi_heap_t* heap, const char* s) mi_attr_noexcept { + if (s == NULL) return NULL; + size_t n = strlen(s); + char* t = (char*)mi_heap_malloc(heap,n+1); + if (t != NULL) memcpy(t, s, n + 1); + return t; +} + +char* mi_strdup(const char* s) mi_attr_noexcept { + return mi_heap_strdup(mi_get_default_heap(), s); +} + +// `strndup` using mi_malloc +char* mi_heap_strndup(mi_heap_t* heap, const char* s, size_t n) mi_attr_noexcept { + if (s == NULL) return NULL; + size_t m = strlen(s); + if (n > m) n = m; + char* t = (char*)mi_heap_malloc(heap, n+1); + if (t == NULL) return NULL; + memcpy(t, s, n); + t[n] = 0; + return t; +} + +char* mi_strndup(const char* s, size_t n) mi_attr_noexcept { + return mi_heap_strndup(mi_get_default_heap(),s,n); +} + +#ifndef __wasi__ +// `realpath` using mi_malloc +#ifdef _WIN32 +#ifndef PATH_MAX +#define PATH_MAX MAX_PATH +#endif +#include +#include +char* mi_heap_realpath(mi_heap_t* heap, const char* fname, char* resolved_name) mi_attr_noexcept { + // todo: use GetFullPathNameW to allow longer file names + char buf[PATH_MAX]; + DWORD res = GetFullPathNameA(fname, PATH_MAX, (resolved_name == NULL ? buf : resolved_name), NULL); + if (res == 0) { + errno = GetLastError(); return NULL; + } + else if (res > PATH_MAX) { + errno = EINVAL; return NULL; + } + else if (resolved_name != NULL) { + return resolved_name; + } + else { + return mi_heap_strndup(heap, buf, PATH_MAX); + } +} +#else +#include // pathconf +static size_t mi_path_max() { + static size_t path_max = 0; + if (path_max <= 0) { + long m = pathconf("/",_PC_PATH_MAX); + if (m <= 0) path_max = 4096; // guess + else if (m < 256) path_max = 256; // at least 256 + else path_max = m; + } + return path_max; +} + +char* mi_heap_realpath(mi_heap_t* heap, const char* fname, char* resolved_name) mi_attr_noexcept { + if (resolved_name != NULL) { + return realpath(fname,resolved_name); + } + else { + size_t n = mi_path_max(); + char* buf = (char*)mi_malloc(n+1); + if (buf==NULL) return NULL; + char* rname = realpath(fname,buf); + char* result = mi_heap_strndup(heap,rname,n); // ok if `rname==NULL` + mi_free(buf); + return result; + } +} +#endif + +char* mi_realpath(const char* fname, char* resolved_name) mi_attr_noexcept { + return mi_heap_realpath(mi_get_default_heap(),fname,resolved_name); +} +#endif + +/*------------------------------------------------------- +C++ new and new_aligned +The standard requires calling into `get_new_handler` and +throwing the bad_alloc exception on failure. If we compile +with a C++ compiler we can implement this precisely. If we +use a C compiler we cannot throw a `bad_alloc` exception +but we call `exit` instead (i.e. not returning). +-------------------------------------------------------*/ + +#ifdef __cplusplus +#include +static bool mi_try_new_handler(bool nothrow) { + std::new_handler h = std::get_new_handler(); + if (h==NULL) { + if (!nothrow) throw std::bad_alloc(); + return false; + } + else { + h(); + return true; + } +} +#else +#include +#ifndef ENOMEM +#define ENOMEM 12 +#endif +typedef void (*std_new_handler_t)(); + +#if (defined(__GNUC__) || defined(__clang__)) +std_new_handler_t __attribute((weak)) _ZSt15get_new_handlerv() { + return NULL; +} +std_new_handler_t mi_get_new_handler() { + return _ZSt15get_new_handlerv(); +} +#else +// note: on windows we could dynamically link to `?get_new_handler@std@@YAP6AXXZXZ`. +std_new_handler_t mi_get_new_handler() { + return NULL; +} +#endif + +static bool mi_try_new_handler(bool nothrow) { + std_new_handler_t h = mi_get_new_handler(); + if (h==NULL) { + if (!nothrow) exit(ENOMEM); + return false; + } + else { + h(); + return true; + } +} +#endif + +static mi_decl_noinline void* mi_try_new(size_t n, bool nothrow ) { + void* p = NULL; + while(p == NULL && mi_try_new_handler(nothrow)) { + p = mi_malloc(n); + } + return p; +} + +void* mi_new(size_t n) { + void* p = mi_malloc(n); + if (mi_unlikely(p == NULL)) return mi_try_new(n,false); + return p; +} + +void* mi_new_aligned(size_t n, size_t alignment) { + void* p; + do { p = mi_malloc_aligned(n, alignment); } + while(p == NULL && mi_try_new_handler(false)); + return p; +} + +void* mi_new_nothrow(size_t n) { + void* p = mi_malloc(n); + if (mi_unlikely(p == NULL)) return mi_try_new(n,true); + return p; +} + +void* mi_new_aligned_nothrow(size_t n, size_t alignment) { + void* p; + do { p = mi_malloc_aligned(n, alignment); } + while (p == NULL && mi_try_new_handler(true)); + return p; +} diff --git a/runtime/src/mimalloc/c/heap.c b/runtime/src/mimalloc/c/heap.c new file mode 100644 index 00000000000..daa9b241c34 --- /dev/null +++ b/runtime/src/mimalloc/c/heap.c @@ -0,0 +1,527 @@ +/*---------------------------------------------------------------------------- +Copyright (c) 2018, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ + +#include "mimalloc.h" +#include "mimalloc-internal.h" +#include "mimalloc-atomic.h" + +#include // memset, memcpy + + +/* ----------------------------------------------------------- + Helpers +----------------------------------------------------------- */ + +// return `true` if ok, `false` to break +typedef bool (heap_page_visitor_fun)(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg1, void* arg2); + +// Visit all pages in a heap; returns `false` if break was called. +static bool mi_heap_visit_pages(mi_heap_t* heap, heap_page_visitor_fun* fn, void* arg1, void* arg2) +{ + if (heap==NULL || heap->page_count==0) return 0; + + // visit all pages + #if MI_DEBUG>1 + size_t total = heap->page_count; + #endif + size_t count = 0; + for (size_t i = 0; i <= MI_BIN_FULL; i++) { + mi_page_queue_t* pq = &heap->pages[i]; + mi_page_t* page = pq->first; + while(page != NULL) { + mi_page_t* next = page->next; // save next in case the page gets removed from the queue + mi_assert_internal(page->heap == heap); + count++; + if (!fn(heap, pq, page, arg1, arg2)) return false; + page = next; // and continue + } + } + mi_assert_internal(count == total); + return true; +} + + +#if MI_DEBUG>1 +static bool _mi_heap_page_is_valid(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg1, void* arg2) { + UNUSED(arg1); + UNUSED(arg2); + UNUSED(pq); + mi_assert_internal(page->heap == heap); + mi_segment_t* segment = _mi_page_segment(page); + mi_assert_internal(segment->thread_id == heap->thread_id); + mi_assert_expensive(_mi_page_is_valid(page)); + return true; +} + +static bool mi_heap_is_valid(mi_heap_t* heap) { + mi_assert_internal(heap!=NULL); + mi_heap_visit_pages(heap, &_mi_heap_page_is_valid, NULL, NULL); + return true; +} +#endif + + + + +/* ----------------------------------------------------------- + "Collect" pages by migrating `local_free` and `thread_free` + lists and freeing empty pages. This is done when a thread + stops (and in that case abandons pages if there are still + blocks alive) +----------------------------------------------------------- */ + +typedef enum mi_collect_e { + NORMAL, + FORCE, + ABANDON +} mi_collect_t; + + +static bool mi_heap_page_collect(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg_collect, void* arg2 ) { + UNUSED(arg2); + UNUSED(heap); + mi_collect_t collect = *((mi_collect_t*)arg_collect); + _mi_page_free_collect(page, collect >= ABANDON); + if (mi_page_all_free(page)) { + // no more used blocks, free the page. TODO: should we retire here and be less aggressive? + _mi_page_free(page, pq, collect != NORMAL); + } + else if (collect == ABANDON) { + // still used blocks but the thread is done; abandon the page + _mi_page_abandon(page, pq); + } + return true; // don't break +} + +static bool mi_heap_page_never_delayed_free(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg1, void* arg2) { + UNUSED(arg1); + UNUSED(arg2); + UNUSED(heap); + UNUSED(pq); + _mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE); + return true; // don't break +} + +static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect) +{ + if (!mi_heap_is_initialized(heap)) return; + _mi_deferred_free(heap, collect > NORMAL); + + // collect (some) abandoned pages + if (collect >= NORMAL && !heap->no_reclaim) { + if (collect == NORMAL) { + // this may free some segments (but also take ownership of abandoned pages) + _mi_segment_try_reclaim_abandoned(heap, false, &heap->tld->segments); + } + #if MI_DEBUG + else if (collect == ABANDON && _mi_is_main_thread() && mi_heap_is_backing(heap)) { + // the main thread is abandoned, try to free all abandoned segments. + // if all memory is freed by now, all segments should be freed. + _mi_segment_try_reclaim_abandoned(heap, true, &heap->tld->segments); + } + #endif + } + + // if abandoning, mark all pages to no longer add to delayed_free + if (collect == ABANDON) { + //for (mi_page_t* page = heap->pages[MI_BIN_FULL].first; page != NULL; page = page->next) { + // _mi_page_use_delayed_free(page, false); // set thread_free.delayed to MI_NO_DELAYED_FREE + //} + mi_heap_visit_pages(heap, &mi_heap_page_never_delayed_free, NULL, NULL); + } + + // free thread delayed blocks. + // (if abandoning, after this there are no more local references into the pages.) + _mi_heap_delayed_free(heap); + + // collect all pages owned by this thread + mi_heap_visit_pages(heap, &mi_heap_page_collect, &collect, NULL); + mi_assert_internal( collect != ABANDON || heap->thread_delayed_free == NULL ); + + // collect segment caches + if (collect >= FORCE) { + _mi_segment_thread_collect(&heap->tld->segments); + } + + // collect regions + if (collect >= FORCE && _mi_is_main_thread()) { + _mi_mem_collect(&heap->tld->stats); + } +} + +void _mi_heap_collect_abandon(mi_heap_t* heap) { + mi_heap_collect_ex(heap, ABANDON); +} + +void mi_heap_collect(mi_heap_t* heap, bool force) mi_attr_noexcept { + mi_heap_collect_ex(heap, (force ? FORCE : NORMAL)); +} + +void mi_collect(bool force) mi_attr_noexcept { + mi_heap_collect(mi_get_default_heap(), force); +} + + +/* ----------------------------------------------------------- + Heap new +----------------------------------------------------------- */ + +mi_heap_t* mi_heap_get_default(void) { + mi_thread_init(); + return mi_get_default_heap(); +} + +mi_heap_t* mi_heap_get_backing(void) { + mi_heap_t* heap = mi_heap_get_default(); + mi_assert_internal(heap!=NULL); + mi_heap_t* bheap = heap->tld->heap_backing; + mi_assert_internal(bheap!=NULL); + mi_assert_internal(bheap->thread_id == _mi_thread_id()); + return bheap; +} + +uintptr_t _mi_heap_random(mi_heap_t* heap) { + uintptr_t r = heap->random; + heap->random = _mi_random_shuffle(r); + return r; +} + +mi_heap_t* mi_heap_new(void) { + mi_heap_t* bheap = mi_heap_get_backing(); + mi_heap_t* heap = mi_heap_malloc_tp(bheap, mi_heap_t); + if (heap==NULL) return NULL; + memcpy(heap, &_mi_heap_empty, sizeof(mi_heap_t)); + heap->tld = bheap->tld; + heap->thread_id = _mi_thread_id(); + heap->cookie = ((uintptr_t)heap ^ _mi_heap_random(bheap)) | 1; + heap->random = _mi_heap_random(bheap); + heap->no_reclaim = true; // don't reclaim abandoned pages or otherwise destroy is unsafe + return heap; +} + +// zero out the page queues +static void mi_heap_reset_pages(mi_heap_t* heap) { + mi_assert_internal(mi_heap_is_initialized(heap)); + // TODO: copy full empty heap instead? + memset(&heap->pages_free_direct, 0, sizeof(heap->pages_free_direct)); +#ifdef MI_MEDIUM_DIRECT + memset(&heap->pages_free_medium, 0, sizeof(heap->pages_free_medium)); +#endif + memcpy(&heap->pages, &_mi_heap_empty.pages, sizeof(heap->pages)); + heap->thread_delayed_free = NULL; + heap->page_count = 0; +} + +// called from `mi_heap_destroy` and `mi_heap_delete` to free the internal heap resources. +static void mi_heap_free(mi_heap_t* heap) { + mi_assert_internal(mi_heap_is_initialized(heap)); + if (mi_heap_is_backing(heap)) return; // dont free the backing heap + + // reset default + if (mi_heap_is_default(heap)) { + _mi_heap_set_default_direct(heap->tld->heap_backing); + } + // and free the used memory + mi_free(heap); +} + + +/* ----------------------------------------------------------- + Heap destroy +----------------------------------------------------------- */ + +static bool _mi_heap_page_destroy(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg1, void* arg2) { + UNUSED(arg1); + UNUSED(arg2); + UNUSED(heap); + UNUSED(pq); + + // ensure no more thread_delayed_free will be added + _mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE); + + // stats + if (page->block_size > MI_LARGE_OBJ_SIZE_MAX) { + if (page->block_size > MI_HUGE_OBJ_SIZE_MAX) { + _mi_stat_decrease(&heap->tld->stats.giant,page->block_size); + } + else { + _mi_stat_decrease(&heap->tld->stats.huge, page->block_size); + } + } + #if (MI_STAT>1) + size_t inuse = page->used - page->thread_freed; + if (page->block_size <= MI_LARGE_OBJ_SIZE_MAX) { + mi_heap_stat_decrease(heap,normal[_mi_bin(page->block_size)], inuse); + } + mi_heap_stat_decrease(heap,malloc, page->block_size * inuse); // todo: off for aligned blocks... + #endif + + // pretend it is all free now + mi_assert_internal(page->thread_freed<=0xFFFF); + page->used = (uint16_t)page->thread_freed; + + // and free the page + _mi_segment_page_free(page,false /* no force? */, &heap->tld->segments); + + return true; // keep going +} + +void _mi_heap_destroy_pages(mi_heap_t* heap) { + mi_heap_visit_pages(heap, &_mi_heap_page_destroy, NULL, NULL); + mi_heap_reset_pages(heap); +} + +void mi_heap_destroy(mi_heap_t* heap) { + mi_assert(mi_heap_is_initialized(heap)); + mi_assert(heap->no_reclaim); + mi_assert_expensive(mi_heap_is_valid(heap)); + if (!mi_heap_is_initialized(heap)) return; + if (!heap->no_reclaim) { + // don't free in case it may contain reclaimed pages + mi_heap_delete(heap); + } + else { + // free all pages + _mi_heap_destroy_pages(heap); + mi_heap_free(heap); + } +} + + + +/* ----------------------------------------------------------- + Safe Heap delete +----------------------------------------------------------- */ + +// Tranfer the pages from one heap to the other +static void mi_heap_absorb(mi_heap_t* heap, mi_heap_t* from) { + mi_assert_internal(heap!=NULL); + if (from==NULL || from->page_count == 0) return; + + // unfull all full pages in the `from` heap + mi_page_t* page = from->pages[MI_BIN_FULL].first; + while (page != NULL) { + mi_page_t* next = page->next; + _mi_page_unfull(page); + page = next; + } + mi_assert_internal(from->pages[MI_BIN_FULL].first == NULL); + + // free outstanding thread delayed free blocks + _mi_heap_delayed_free(from); + + // transfer all pages by appending the queues; this will set + // a new heap field which is ok as all pages are unfull'd and thus + // other threads won't access this field anymore (see `mi_free_block_mt`) + for (size_t i = 0; i < MI_BIN_FULL; i++) { + mi_page_queue_t* pq = &heap->pages[i]; + mi_page_queue_t* append = &from->pages[i]; + size_t pcount = _mi_page_queue_append(heap, pq, append); + heap->page_count += pcount; + from->page_count -= pcount; + } + mi_assert_internal(from->thread_delayed_free == NULL); + mi_assert_internal(from->page_count == 0); + + // and reset the `from` heap + mi_heap_reset_pages(from); +} + +// Safe delete a heap without freeing any still allocated blocks in that heap. +void mi_heap_delete(mi_heap_t* heap) +{ + mi_assert(mi_heap_is_initialized(heap)); + mi_assert_expensive(mi_heap_is_valid(heap)); + if (!mi_heap_is_initialized(heap)) return; + + if (!mi_heap_is_backing(heap)) { + // tranfer still used pages to the backing heap + mi_heap_absorb(heap->tld->heap_backing, heap); + } + else { + // the backing heap abandons its pages + _mi_heap_collect_abandon(heap); + } + mi_assert_internal(heap->page_count==0); + mi_heap_free(heap); +} + +mi_heap_t* mi_heap_set_default(mi_heap_t* heap) { + mi_assert(mi_heap_is_initialized(heap)); + if (!mi_heap_is_initialized(heap)) return NULL; + mi_assert_expensive(mi_heap_is_valid(heap)); + mi_heap_t* old = mi_get_default_heap(); + _mi_heap_set_default_direct(heap); + return old; +} + + + + +/* ----------------------------------------------------------- + Analysis +----------------------------------------------------------- */ + +// static since it is not thread safe to access heaps from other threads. +static mi_heap_t* mi_heap_of_block(const void* p) { + if (p == NULL) return NULL; + mi_segment_t* segment = _mi_ptr_segment(p); + bool valid = (_mi_ptr_cookie(segment) == segment->cookie); + mi_assert_internal(valid); + if (mi_unlikely(!valid)) return NULL; + return _mi_segment_page_of(segment,p)->heap; +} + +bool mi_heap_contains_block(mi_heap_t* heap, const void* p) { + mi_assert(heap != NULL); + if (!mi_heap_is_initialized(heap)) return false; + return (heap == mi_heap_of_block(p)); +} + + +static bool mi_heap_page_check_owned(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* p, void* vfound) { + UNUSED(heap); + UNUSED(pq); + bool* found = (bool*)vfound; + mi_segment_t* segment = _mi_page_segment(page); + void* start = _mi_page_start(segment, page, NULL); + void* end = (uint8_t*)start + (page->capacity * page->block_size); + *found = (p >= start && p < end); + return (!*found); // continue if not found +} + +bool mi_heap_check_owned(mi_heap_t* heap, const void* p) { + mi_assert(heap != NULL); + if (!mi_heap_is_initialized(heap)) return false; + if (((uintptr_t)p & (MI_INTPTR_SIZE - 1)) != 0) return false; // only aligned pointers + bool found = false; + mi_heap_visit_pages(heap, &mi_heap_page_check_owned, (void*)p, &found); + return found; +} + +bool mi_check_owned(const void* p) { + return mi_heap_check_owned(mi_get_default_heap(), p); +} + +/* ----------------------------------------------------------- + Visit all heap blocks and areas + Todo: enable visiting abandoned pages, and + enable visiting all blocks of all heaps across threads +----------------------------------------------------------- */ + +// Separate struct to keep `mi_page_t` out of the public interface +typedef struct mi_heap_area_ex_s { + mi_heap_area_t area; + mi_page_t* page; +} mi_heap_area_ex_t; + +static bool mi_heap_area_visit_blocks(const mi_heap_area_ex_t* xarea, mi_block_visit_fun* visitor, void* arg) { + mi_assert(xarea != NULL); + if (xarea==NULL) return true; + const mi_heap_area_t* area = &xarea->area; + mi_page_t* page = xarea->page; + mi_assert(page != NULL); + if (page == NULL) return true; + + _mi_page_free_collect(page,true); + mi_assert_internal(page->local_free == NULL); + if (page->used == 0) return true; + + size_t psize; + uint8_t* pstart = _mi_page_start(_mi_page_segment(page), page, &psize); + + if (page->capacity == 1) { + // optimize page with one block + mi_assert_internal(page->used == 1 && page->free == NULL); + return visitor(page->heap, area, pstart, page->block_size, arg); + } + + // create a bitmap of free blocks. + #define MI_MAX_BLOCKS (MI_SMALL_PAGE_SIZE / sizeof(void*)) + uintptr_t free_map[MI_MAX_BLOCKS / sizeof(uintptr_t)]; + memset(free_map, 0, sizeof(free_map)); + + size_t free_count = 0; + for (mi_block_t* block = page->free; block != NULL; block = mi_block_next(page,block)) { + free_count++; + mi_assert_internal((uint8_t*)block >= pstart && (uint8_t*)block < (pstart + psize)); + size_t offset = (uint8_t*)block - pstart; + mi_assert_internal(offset % page->block_size == 0); + size_t blockidx = offset / page->block_size; // Todo: avoid division? + mi_assert_internal( blockidx < MI_MAX_BLOCKS); + size_t bitidx = (blockidx / sizeof(uintptr_t)); + size_t bit = blockidx - (bitidx * sizeof(uintptr_t)); + free_map[bitidx] |= ((uintptr_t)1 << bit); + } + mi_assert_internal(page->capacity == (free_count + page->used)); + + // walk through all blocks skipping the free ones + size_t used_count = 0; + for (size_t i = 0; i < page->capacity; i++) { + size_t bitidx = (i / sizeof(uintptr_t)); + size_t bit = i - (bitidx * sizeof(uintptr_t)); + uintptr_t m = free_map[bitidx]; + if (bit == 0 && m == UINTPTR_MAX) { + i += (sizeof(uintptr_t) - 1); // skip a run of free blocks + } + else if ((m & ((uintptr_t)1 << bit)) == 0) { + used_count++; + uint8_t* block = pstart + (i * page->block_size); + if (!visitor(page->heap, area, block, page->block_size, arg)) return false; + } + } + mi_assert_internal(page->used == used_count); + return true; +} + +typedef bool (mi_heap_area_visit_fun)(const mi_heap_t* heap, const mi_heap_area_ex_t* area, void* arg); + + +static bool mi_heap_visit_areas_page(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* vfun, void* arg) { + UNUSED(heap); + UNUSED(pq); + mi_heap_area_visit_fun* fun = (mi_heap_area_visit_fun*)vfun; + mi_heap_area_ex_t xarea; + xarea.page = page; + xarea.area.reserved = page->reserved * page->block_size; + xarea.area.committed = page->capacity * page->block_size; + xarea.area.blocks = _mi_page_start(_mi_page_segment(page), page, NULL); + xarea.area.used = page->used - page->thread_freed; // race is ok + xarea.area.block_size = page->block_size; + return fun(heap, &xarea, arg); +} + +// Visit all heap pages as areas +static bool mi_heap_visit_areas(const mi_heap_t* heap, mi_heap_area_visit_fun* visitor, void* arg) { + if (visitor == NULL) return false; + return mi_heap_visit_pages((mi_heap_t*)heap, &mi_heap_visit_areas_page, (void*)(visitor), arg); // note: function pointer to void* :-{ +} + +// Just to pass arguments +typedef struct mi_visit_blocks_args_s { + bool visit_blocks; + mi_block_visit_fun* visitor; + void* arg; +} mi_visit_blocks_args_t; + +static bool mi_heap_area_visitor(const mi_heap_t* heap, const mi_heap_area_ex_t* xarea, void* arg) { + mi_visit_blocks_args_t* args = (mi_visit_blocks_args_t*)arg; + if (!args->visitor(heap, &xarea->area, NULL, xarea->area.block_size, args->arg)) return false; + if (args->visit_blocks) { + return mi_heap_area_visit_blocks(xarea, args->visitor, args->arg); + } + else { + return true; + } +} + +// Visit all blocks in a heap +bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_blocks, mi_block_visit_fun* visitor, void* arg) { + mi_visit_blocks_args_t args = { visit_blocks, visitor, arg }; + return mi_heap_visit_areas(heap, &mi_heap_area_visitor, &args); +} + diff --git a/runtime/src/mimalloc/c/include/mimalloc-atomic.h b/runtime/src/mimalloc/c/include/mimalloc-atomic.h new file mode 100644 index 00000000000..56c1320170b --- /dev/null +++ b/runtime/src/mimalloc/c/include/mimalloc-atomic.h @@ -0,0 +1,248 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2018, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ +#pragma once +#ifndef MIMALLOC_ATOMIC_H +#define MIMALLOC_ATOMIC_H + +// ------------------------------------------------------ +// Atomics +// We need to be portable between C, C++, and MSVC. +// ------------------------------------------------------ + +#if defined(_MSC_VER) +#define _Atomic(tp) tp +#define ATOMIC_VAR_INIT(x) x +#elif defined(__cplusplus) +#include +#define _Atomic(tp) std::atomic +#else +#include +#endif + +#define mi_atomic_cast(tp,x) (volatile _Atomic(tp)*)(x) + +// ------------------------------------------------------ +// Atomic operations specialized for mimalloc +// ------------------------------------------------------ + +// Atomically add a 64-bit value; returns the previous value. +// Note: not using _Atomic(int64_t) as it is only used for statistics. +static inline void mi_atomic_add64(volatile int64_t* p, int64_t add); + +// Atomically add a value; returns the previous value. Memory ordering is relaxed. +static inline intptr_t mi_atomic_add(volatile _Atomic(intptr_t)* p, intptr_t add); + +// Atomically compare and exchange a value; returns `true` if successful. +// May fail spuriously. Memory ordering as release on success, and relaxed on failure. +// (Note: expected and desired are in opposite order from atomic_compare_exchange) +static inline bool mi_atomic_cas_weak(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected); + +// Atomically compare and exchange a value; returns `true` if successful. +// Memory ordering is acquire-release +// (Note: expected and desired are in opposite order from atomic_compare_exchange) +static inline bool mi_atomic_cas_strong(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected); + +// Atomically exchange a value. Memory ordering is acquire-release. +static inline uintptr_t mi_atomic_exchange(volatile _Atomic(uintptr_t)* p, uintptr_t exchange); + +// Atomically read a value. Memory ordering is relaxed. +static inline uintptr_t mi_atomic_read_relaxed(const volatile _Atomic(uintptr_t)* p); + +// Atomically read a value. Memory ordering is acquire. +static inline uintptr_t mi_atomic_read(const volatile _Atomic(uintptr_t)* p); + +// Atomically write a value. Memory ordering is release. +static inline void mi_atomic_write(volatile _Atomic(uintptr_t)* p, uintptr_t x); + +// Yield +static inline void mi_atomic_yield(void); + + + +// Atomically add a value; returns the previous value. +static inline uintptr_t mi_atomic_addu(volatile _Atomic(uintptr_t)* p, uintptr_t add) { + return (uintptr_t)mi_atomic_add((volatile _Atomic(intptr_t)*)p, (intptr_t)add); +} +// Atomically subtract a value; returns the previous value. +static inline uintptr_t mi_atomic_subu(volatile _Atomic(uintptr_t)* p, uintptr_t sub) { + return (uintptr_t)mi_atomic_add((volatile _Atomic(intptr_t)*)p, -((intptr_t)sub)); +} + +// Atomically increment a value; returns the incremented result. +static inline uintptr_t mi_atomic_increment(volatile _Atomic(uintptr_t)* p) { + return mi_atomic_addu(p, 1); +} + +// Atomically decrement a value; returns the decremented result. +static inline uintptr_t mi_atomic_decrement(volatile _Atomic(uintptr_t)* p) { + return mi_atomic_subu(p, 1); +} + +// Atomically read a pointer; Memory order is relaxed. +static inline void* mi_atomic_read_ptr_relaxed(volatile _Atomic(void*) const * p) { + return (void*)mi_atomic_read_relaxed((const volatile _Atomic(uintptr_t)*)p); +} + +// Atomically read a pointer; Memory order is acquire. +static inline void* mi_atomic_read_ptr(volatile _Atomic(void*) const * p) { + return (void*)mi_atomic_read((const volatile _Atomic(uintptr_t)*)p); +} + +// Atomically write a pointer +static inline void mi_atomic_write_ptr(volatile _Atomic(void*)* p, void* x) { + mi_atomic_write((volatile _Atomic(uintptr_t)*)p, (uintptr_t)x ); +} + +// Atomically compare and exchange a pointer; returns `true` if successful. May fail spuriously. +// (Note: expected and desired are in opposite order from atomic_compare_exchange) +static inline bool mi_atomic_cas_ptr_weak(volatile _Atomic(void*)* p, void* desired, void* expected) { + return mi_atomic_cas_weak((volatile _Atomic(uintptr_t)*)p, (uintptr_t)desired, (uintptr_t)expected); +} + +// Atomically compare and exchange a pointer; returns `true` if successful. +// (Note: expected and desired are in opposite order from atomic_compare_exchange) +static inline bool mi_atomic_cas_ptr_strong(volatile _Atomic(void*)* p, void* desired, void* expected) { + return mi_atomic_cas_strong((volatile _Atomic(uintptr_t)*)p, (uintptr_t)desired, (uintptr_t)expected); +} + +// Atomically exchange a pointer value. +static inline void* mi_atomic_exchange_ptr(volatile _Atomic(void*)* p, void* exchange) { + return (void*)mi_atomic_exchange((volatile _Atomic(uintptr_t)*)p, (uintptr_t)exchange); +} + + +#ifdef _MSC_VER +#define WIN32_LEAN_AND_MEAN +#include +#include +#ifdef _WIN64 +typedef LONG64 msc_intptr_t; +#define RC64(f) f##64 +#else +typedef LONG msc_intptr_t; +#define RC64(f) f +#endif +static inline intptr_t mi_atomic_add(volatile _Atomic(intptr_t)* p, intptr_t add) { + return (intptr_t)RC64(_InterlockedExchangeAdd)((volatile msc_intptr_t*)p, (msc_intptr_t)add); +} +static inline bool mi_atomic_cas_strong(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected) { + return (expected == (uintptr_t)RC64(_InterlockedCompareExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)desired, (msc_intptr_t)expected)); +} +static inline bool mi_atomic_cas_weak(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected) { + return mi_atomic_cas_strong(p,desired,expected); +} +static inline uintptr_t mi_atomic_exchange(volatile _Atomic(uintptr_t)* p, uintptr_t exchange) { + return (uintptr_t)RC64(_InterlockedExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)exchange); +} +static inline uintptr_t mi_atomic_read(volatile _Atomic(uintptr_t) const* p) { + return *p; +} +static inline uintptr_t mi_atomic_read_relaxed(volatile _Atomic(uintptr_t) const* p) { + return mi_atomic_read(p); +} +static inline void mi_atomic_write(volatile _Atomic(uintptr_t)* p, uintptr_t x) { + mi_atomic_exchange(p,x); +} +static inline void mi_atomic_yield(void) { + YieldProcessor(); +} +static inline void mi_atomic_add64(volatile _Atomic(int64_t)* p, int64_t add) { + #ifdef _WIN64 + mi_atomic_add(p,add); + #else + int64_t current; + int64_t sum; + do { + current = *p; + sum = current + add; + } while (_InterlockedCompareExchange64(p, sum, current) != current); + #endif +} + +#else +#ifdef __cplusplus +#define MI_USING_STD using namespace std; +#else +#define MI_USING_STD +#endif +static inline void mi_atomic_add64(volatile int64_t* p, int64_t add) { + MI_USING_STD + atomic_fetch_add_explicit((volatile _Atomic(int64_t)*)p, add, memory_order_relaxed); +} +static inline intptr_t mi_atomic_add(volatile _Atomic(intptr_t)* p, intptr_t add) { + MI_USING_STD + return atomic_fetch_add_explicit(p, add, memory_order_relaxed); +} +static inline bool mi_atomic_cas_weak(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected) { + MI_USING_STD + return atomic_compare_exchange_weak_explicit(p, &expected, desired, memory_order_release, memory_order_relaxed); +} +static inline bool mi_atomic_cas_strong(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected) { + MI_USING_STD + return atomic_compare_exchange_strong_explicit(p, &expected, desired, memory_order_acq_rel, memory_order_relaxed); +} +static inline uintptr_t mi_atomic_exchange(volatile _Atomic(uintptr_t)* p, uintptr_t exchange) { + MI_USING_STD + return atomic_exchange_explicit(p, exchange, memory_order_acq_rel); +} +static inline uintptr_t mi_atomic_read_relaxed(const volatile _Atomic(uintptr_t)* p) { + MI_USING_STD + return atomic_load_explicit((volatile _Atomic(uintptr_t)*) p, memory_order_relaxed); +} +static inline uintptr_t mi_atomic_read(const volatile _Atomic(uintptr_t)* p) { + MI_USING_STD + return atomic_load_explicit((volatile _Atomic(uintptr_t)*) p, memory_order_acquire); +} +static inline void mi_atomic_write(volatile _Atomic(uintptr_t)* p, uintptr_t x) { + MI_USING_STD + return atomic_store_explicit(p, x, memory_order_release); +} + +#if defined(__cplusplus) + #include + static inline void mi_atomic_yield(void) { + std::this_thread::yield(); + } +#elif (defined(__GNUC__) || defined(__clang__)) && \ + (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__)) +#if defined(__x86_64__) || defined(__i386__) + static inline void mi_atomic_yield(void) { + asm volatile ("pause" ::: "memory"); + } +#elif defined(__arm__) || defined(__aarch64__) + #if KONAN_MI_MALLOC + #if defined(__arm__) + #include + static inline void mi_atomic_yield(void) { + sched_yield(); + } + #else + static inline void mi_atomic_yield(void) { + asm volatile("yield"); + } + #endif + #else + static inline void mi_atomic_yield(void) { + asm volatile("yield"); + } + #endif +#endif +#elif defined(__wasi__) + #include + static inline void mi_atomic_yield(void) { + sched_yield(); + } +#else + #include + static inline void mi_atomic_yield(void) { + sleep(0); + } +#endif + +#endif + +#endif // __MIMALLOC_ATOMIC_H diff --git a/runtime/src/mimalloc/c/include/mimalloc-internal.h b/runtime/src/mimalloc/c/include/mimalloc-internal.h new file mode 100644 index 00000000000..b0997d1e3bd --- /dev/null +++ b/runtime/src/mimalloc/c/include/mimalloc-internal.h @@ -0,0 +1,498 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2018, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ +#pragma once +#ifndef MIMALLOC_INTERNAL_H +#define MIMALLOC_INTERNAL_H + +#include "mimalloc-types.h" + +#if defined(MI_MALLOC_OVERRIDE) && (defined(__APPLE__) || defined(__OpenBSD__)) +#define MI_TLS_RECURSE_GUARD +#endif + +#if (MI_DEBUG>0) +#define mi_trace_message(...) _mi_trace_message(__VA_ARGS__) +#else +#define mi_trace_message(...) +#endif + +#if defined(_MSC_VER) +#define mi_decl_noinline __declspec(noinline) +#define mi_attr_noreturn +#elif defined(__GNUC__) || defined(__clang__) +#define mi_decl_noinline __attribute__((noinline)) +#define mi_attr_noreturn __attribute__((noreturn)) +#else +#define mi_decl_noinline +#define mi_attr_noreturn +#endif + + +// "options.c" +void _mi_fputs(mi_output_fun* out, const char* prefix, const char* message); +void _mi_fprintf(mi_output_fun* out, const char* fmt, ...); +void _mi_error_message(const char* fmt, ...); +void _mi_warning_message(const char* fmt, ...); +void _mi_verbose_message(const char* fmt, ...); +void _mi_trace_message(const char* fmt, ...); +void _mi_options_init(void); +void _mi_fatal_error(const char* fmt, ...) mi_attr_noreturn; + +// "init.c" +extern mi_stats_t _mi_stats_main; +extern const mi_page_t _mi_page_empty; +bool _mi_is_main_thread(void); +uintptr_t _mi_random_shuffle(uintptr_t x); +uintptr_t _mi_random_init(uintptr_t seed /* can be zero */); +bool _mi_preloading(); // true while the C runtime is not ready + +// os.c +size_t _mi_os_page_size(void); +void _mi_os_init(void); // called from process init +void* _mi_os_alloc(size_t size, mi_stats_t* stats); // to allocate thread local data +void _mi_os_free(void* p, size_t size, mi_stats_t* stats); // to free thread local data +size_t _mi_os_good_alloc_size(size_t size); + +// memory.c +void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* id, mi_os_tld_t* tld); +void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats); + +bool _mi_mem_reset(void* p, size_t size, mi_stats_t* stats); +bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats); +bool _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats); +bool _mi_mem_protect(void* addr, size_t size); +bool _mi_mem_unprotect(void* addr, size_t size); + +void _mi_mem_collect(mi_stats_t* stats); + +// "segment.c" +mi_page_t* _mi_segment_page_alloc(size_t block_wsize, mi_segments_tld_t* tld, mi_os_tld_t* os_tld); +void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld); +void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld); +bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segments_tld_t* tld); +void _mi_segment_thread_collect(mi_segments_tld_t* tld); +uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t block_size, size_t* page_size); // page start for any page + +// "page.c" +void* _mi_malloc_generic(mi_heap_t* heap, size_t size) mi_attr_noexcept mi_attr_malloc; + +void _mi_page_retire(mi_page_t* page); // free the page if there are no other pages with many free blocks +void _mi_page_unfull(mi_page_t* page); +void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force); // free the page +void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq); // abandon the page, to be picked up by another thread... +void _mi_heap_delayed_free(mi_heap_t* heap); + +void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay); +size_t _mi_page_queue_append(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_queue_t* append); +void _mi_deferred_free(mi_heap_t* heap, bool force); + +void _mi_page_free_collect(mi_page_t* page,bool force); +void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page); // callback from segments + +size_t _mi_bin_size(uint8_t bin); // for stats +uint8_t _mi_bin(size_t size); // for stats +uint8_t _mi_bsr(uintptr_t x); // bit-scan-right, used on BSD in "os.c" + +// "heap.c" +void _mi_heap_destroy_pages(mi_heap_t* heap); +void _mi_heap_collect_abandon(mi_heap_t* heap); +uintptr_t _mi_heap_random(mi_heap_t* heap); +void _mi_heap_set_default_direct(mi_heap_t* heap); + +// "stats.c" +void _mi_stats_done(mi_stats_t* stats); +double _mi_clock_end(double start); +double _mi_clock_start(void); + +// "alloc.c" +void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept; // called from `_mi_malloc_generic` +void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero); +void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero); +mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* page, const void* p); +bool _mi_free_delayed_block(mi_block_t* block); +void _mi_block_zero_init(const mi_page_t* page, void* p, size_t size); + +#if MI_DEBUG>1 +bool _mi_page_is_valid(mi_page_t* page); +#endif + + +// ------------------------------------------------------ +// Branches +// ------------------------------------------------------ + +#if defined(__GNUC__) || defined(__clang__) +#define mi_unlikely(x) __builtin_expect((x),0) +#define mi_likely(x) __builtin_expect((x),1) +#else +#define mi_unlikely(x) (x) +#define mi_likely(x) (x) +#endif + +#ifndef __has_builtin +#define __has_builtin(x) 0 +#endif + + + +/* ----------------------------------------------------------- + Inlined definitions +----------------------------------------------------------- */ +#define UNUSED(x) (void)(x) +#if (MI_DEBUG>0) +#define UNUSED_RELEASE(x) +#else +#define UNUSED_RELEASE(x) UNUSED(x) +#endif + +#define MI_INIT4(x) x(),x(),x(),x() +#define MI_INIT8(x) MI_INIT4(x),MI_INIT4(x) +#define MI_INIT16(x) MI_INIT8(x),MI_INIT8(x) +#define MI_INIT32(x) MI_INIT16(x),MI_INIT16(x) +#define MI_INIT64(x) MI_INIT32(x),MI_INIT32(x) +#define MI_INIT128(x) MI_INIT64(x),MI_INIT64(x) +#define MI_INIT256(x) MI_INIT128(x),MI_INIT128(x) + + +// Overflow detecting multiply +#define MI_MUL_NO_OVERFLOW ((size_t)1 << (4*sizeof(size_t))) // sqrt(SIZE_MAX) +static inline bool mi_mul_overflow(size_t count, size_t size, size_t* total) { +#if __has_builtin(__builtin_umul_overflow) || __GNUC__ >= 5 +#include // UINT_MAX, ULONG_MAX +#if (SIZE_MAX == UINT_MAX) + return __builtin_umul_overflow(count, size, total); +#elif (SIZE_MAX == ULONG_MAX) + return __builtin_umull_overflow(count, size, total); +#else + return __builtin_umulll_overflow(count, size, total); +#endif +#else /* __builtin_umul_overflow is unavailable */ + *total = count * size; + return ((size >= MI_MUL_NO_OVERFLOW || count >= MI_MUL_NO_OVERFLOW) + && size > 0 && (SIZE_MAX / size) < count); +#endif +} + +// Is `x` a power of two? (0 is considered a power of two) +static inline bool _mi_is_power_of_two(uintptr_t x) { + return ((x & (x - 1)) == 0); +} + +// Align upwards +static inline uintptr_t _mi_align_up(uintptr_t sz, size_t alignment) { + uintptr_t mask = alignment - 1; + if ((alignment & mask) == 0) { // power of two? + return ((sz + mask) & ~mask); + } + else { + return (((sz + mask)/alignment)*alignment); + } +} + +// Is memory zero initialized? +static inline bool mi_mem_is_zero(void* p, size_t size) { + for (size_t i = 0; i < size; i++) { + if (((uint8_t*)p)[i] != 0) return false; + } + return true; +} + +// Align a byte size to a size in _machine words_, +// i.e. byte size == `wsize*sizeof(void*)`. +static inline size_t _mi_wsize_from_size(size_t size) { + mi_assert_internal(size <= SIZE_MAX - sizeof(uintptr_t)); + return (size + sizeof(uintptr_t) - 1) / sizeof(uintptr_t); +} + + +/* ----------------------------------------------------------- + The thread local default heap +----------------------------------------------------------- */ + +extern const mi_heap_t _mi_heap_empty; // read-only empty heap, initial value of the thread local default heap +extern mi_heap_t _mi_heap_main; // statically allocated main backing heap +extern bool _mi_process_is_initialized; + +extern mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate from + +static inline mi_heap_t* mi_get_default_heap(void) { +#ifdef MI_TLS_RECURSE_GUARD + // on some platforms, like macOS, the dynamic loader calls `malloc` + // to initialize thread local data. To avoid recursion, we need to avoid + // accessing the thread local `_mi_default_heap` until our module is loaded + // and use the statically allocated main heap until that time. + // TODO: patch ourselves dynamically to avoid this check every time? + if (!_mi_process_is_initialized) return &_mi_heap_main; +#endif + return _mi_heap_default; +} + +static inline bool mi_heap_is_default(const mi_heap_t* heap) { + return (heap == mi_get_default_heap()); +} + +static inline bool mi_heap_is_backing(const mi_heap_t* heap) { + return (heap->tld->heap_backing == heap); +} + +static inline bool mi_heap_is_initialized(mi_heap_t* heap) { + mi_assert_internal(heap != NULL); + return (heap != &_mi_heap_empty); +} + +static inline uintptr_t _mi_ptr_cookie(const void* p) { + return ((uintptr_t)p ^ _mi_heap_main.cookie); +} + +/* ----------------------------------------------------------- + Pages +----------------------------------------------------------- */ + +static inline mi_page_t* _mi_heap_get_free_small_page(mi_heap_t* heap, size_t size) { + mi_assert_internal(size <= MI_SMALL_SIZE_MAX); + return heap->pages_free_direct[_mi_wsize_from_size(size)]; +} + +// Get the page belonging to a certain size class +static inline mi_page_t* _mi_get_free_small_page(size_t size) { + return _mi_heap_get_free_small_page(mi_get_default_heap(), size); +} + +// Segment that contains the pointer +static inline mi_segment_t* _mi_ptr_segment(const void* p) { + // mi_assert_internal(p != NULL); + return (mi_segment_t*)((uintptr_t)p & ~MI_SEGMENT_MASK); +} + +// Segment belonging to a page +static inline mi_segment_t* _mi_page_segment(const mi_page_t* page) { + mi_segment_t* segment = _mi_ptr_segment(page); + mi_assert_internal(segment == NULL || page == &segment->pages[page->segment_idx]); + return segment; +} + +// used internally +static inline uintptr_t _mi_segment_page_idx_of(const mi_segment_t* segment, const void* p) { + // if (segment->page_size > MI_SEGMENT_SIZE) return &segment->pages[0]; // huge pages + ptrdiff_t diff = (uint8_t*)p - (uint8_t*)segment; + mi_assert_internal(diff >= 0 && diff < MI_SEGMENT_SIZE); + uintptr_t idx = (uintptr_t)diff >> segment->page_shift; + mi_assert_internal(idx < segment->capacity); + mi_assert_internal(segment->page_kind <= MI_PAGE_MEDIUM || idx == 0); + return idx; +} + +// Get the page containing the pointer +static inline mi_page_t* _mi_segment_page_of(const mi_segment_t* segment, const void* p) { + uintptr_t idx = _mi_segment_page_idx_of(segment, p); + return &((mi_segment_t*)segment)->pages[idx]; +} + +// Quick page start for initialized pages +static inline uint8_t* _mi_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size) { + return _mi_segment_page_start(segment, page, page->block_size, page_size); +} + +// Get the page containing the pointer +static inline mi_page_t* _mi_ptr_page(void* p) { + return _mi_segment_page_of(_mi_ptr_segment(p), p); +} + +// Thread free access +static inline mi_block_t* mi_tf_block(mi_thread_free_t tf) { + return (mi_block_t*)(tf & ~0x03); +} +static inline mi_delayed_t mi_tf_delayed(mi_thread_free_t tf) { + return (mi_delayed_t)(tf & 0x03); +} +static inline mi_thread_free_t mi_tf_make(mi_block_t* block, mi_delayed_t delayed) { + return (mi_thread_free_t)((uintptr_t)block | (uintptr_t)delayed); +} +static inline mi_thread_free_t mi_tf_set_delayed(mi_thread_free_t tf, mi_delayed_t delayed) { + return mi_tf_make(mi_tf_block(tf),delayed); +} +static inline mi_thread_free_t mi_tf_set_block(mi_thread_free_t tf, mi_block_t* block) { + return mi_tf_make(block, mi_tf_delayed(tf)); +} + +// are all blocks in a page freed? +static inline bool mi_page_all_free(const mi_page_t* page) { + mi_assert_internal(page != NULL); + return (page->used - page->thread_freed == 0); +} + +// are there immediately available blocks +static inline bool mi_page_immediate_available(const mi_page_t* page) { + mi_assert_internal(page != NULL); + return (page->free != NULL); +} +// are there free blocks in this page? +static inline bool mi_page_has_free(mi_page_t* page) { + mi_assert_internal(page != NULL); + bool hasfree = (mi_page_immediate_available(page) || page->local_free != NULL || (mi_tf_block(page->thread_free) != NULL)); + mi_assert_internal(hasfree || page->used - page->thread_freed == page->capacity); + return hasfree; +} + +// are all blocks in use? +static inline bool mi_page_all_used(mi_page_t* page) { + mi_assert_internal(page != NULL); + return !mi_page_has_free(page); +} + +// is more than 7/8th of a page in use? +static inline bool mi_page_mostly_used(const mi_page_t* page) { + if (page==NULL) return true; + uint16_t frac = page->reserved / 8U; + return (page->reserved - page->used + page->thread_freed <= frac); +} + +static inline mi_page_queue_t* mi_page_queue(const mi_heap_t* heap, size_t size) { + return &((mi_heap_t*)heap)->pages[_mi_bin(size)]; +} + + + +//----------------------------------------------------------- +// Page flags +//----------------------------------------------------------- +static inline bool mi_page_is_in_full(const mi_page_t* page) { + return page->flags.x.in_full; +} + +static inline void mi_page_set_in_full(mi_page_t* page, bool in_full) { + page->flags.x.in_full = in_full; +} + +static inline bool mi_page_has_aligned(const mi_page_t* page) { + return page->flags.x.has_aligned; +} + +static inline void mi_page_set_has_aligned(mi_page_t* page, bool has_aligned) { + page->flags.x.has_aligned = has_aligned; +} + + +// ------------------------------------------------------------------- +// Encoding/Decoding the free list next pointers +// Note: we pass a `null` value to be used as the `NULL` value for the +// end of a free list. This is to prevent the cookie itself to ever +// be present among user blocks (as `cookie^0==cookie`). +// ------------------------------------------------------------------- + +static inline bool mi_is_in_same_segment(const void* p, const void* q) { + return (_mi_ptr_segment(p) == _mi_ptr_segment(q)); +} + +static inline bool mi_is_in_same_page(const void* p, const void* q) { + mi_segment_t* segmentp = _mi_ptr_segment(p); + mi_segment_t* segmentq = _mi_ptr_segment(q); + if (segmentp != segmentq) return false; + uintptr_t idxp = _mi_segment_page_idx_of(segmentp, p); + uintptr_t idxq = _mi_segment_page_idx_of(segmentq, q); + return (idxp == idxq); +} + +static inline mi_block_t* mi_block_nextx( const void* null, const mi_block_t* block, uintptr_t cookie ) { + #ifdef MI_ENCODE_FREELIST + mi_block_t* b = (mi_block_t*)(block->next ^ cookie); + if (mi_unlikely((void*)b==null)) { b = NULL; } + return b; + #else + UNUSED(cookie); UNUSED(null); + return (mi_block_t*)block->next; + #endif +} + +static inline void mi_block_set_nextx(const void* null, mi_block_t* block, const mi_block_t* next, uintptr_t cookie) { + #ifdef MI_ENCODE_FREELIST + if (mi_unlikely(next==NULL)) { next = (mi_block_t*)null; } + block->next = (mi_encoded_t)next ^ cookie; + #else + UNUSED(cookie); UNUSED(null); + block->next = (mi_encoded_t)next; + #endif +} + +static inline mi_block_t* mi_block_next(const mi_page_t* page, const mi_block_t* block) { + #ifdef MI_ENCODE_FREELIST + mi_block_t* next = mi_block_nextx(page,block,page->cookie); + // check for free list corruption: is `next` at least in our segment range? + // TODO: check if `next` is `page->block_size` aligned? + if (next!=NULL && !mi_is_in_same_page(block, next)) { + _mi_fatal_error("corrupted free list entry of size %zub at %p: value 0x%zx\n", page->block_size, block, (uintptr_t)next); + next = NULL; + } + return next; + #else + UNUSED(page); + return mi_block_nextx(page,block,0); + #endif +} + +static inline void mi_block_set_next(const mi_page_t* page, mi_block_t* block, const mi_block_t* next) { + #ifdef MI_ENCODE_FREELIST + mi_block_set_nextx(page,block,next, page->cookie); + #else + UNUSED(page); + mi_block_set_nextx(page,block, next,0); + #endif +} + +// ------------------------------------------------------------------- +// Getting the thread id should be performant +// as it is called in the fast path of `_mi_free`, +// so we specialize for various platforms. +// ------------------------------------------------------------------- +#if defined(_WIN32) +#define WIN32_LEAN_AND_MEAN +#include +static inline uintptr_t _mi_thread_id(void) mi_attr_noexcept { + // Windows: works on Intel and ARM in both 32- and 64-bit + return (uintptr_t)NtCurrentTeb(); +} +#elif (defined(__GNUC__) || defined(__clang__)) && \ + (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__)) +#if KONAN_MI_MALLOC + #include + pthread_t pthread_self(void); +#endif +// TLS register on x86 is in the FS or GS register +// see: https://akkadia.org/drepper/tls.pdf +static inline uintptr_t _mi_thread_id(void) mi_attr_noexcept { + uintptr_t tid; + #if defined(__i386__) + __asm__("movl %%gs:0, %0" : "=r" (tid) : : ); // 32-bit always uses GS + #elif defined(__MACH__) + #if KONAN_MI_MALLOC + #include + #if TARGET_OS_EMBEDDED // iOS/tvOS/watchOS devices. + tid = pthread_self(); + #else + __asm__("movq %%gs:0, %0" : "=r" (tid) : : ); // x86_64 macOS uses GS + #endif + #else + __asm__("movq %%gs:0, %0" : "=r" (tid) : : ); // x86_64 macOS uses GS + #endif + #elif defined(__x86_64__) + __asm__("movq %%fs:0, %0" : "=r" (tid) : : ); // x86_64 Linux, BSD uses FS + #elif defined(__arm__) + asm volatile ("mrc p15, 0, %0, c13, c0, 3" : "=r" (tid)); + #elif defined(__aarch64__) + asm volatile ("mrs %0, tpidr_el0" : "=r" (tid)); + #endif + return tid; +} +#else +// otherwise use standard C +static inline uintptr_t _mi_thread_id(void) mi_attr_noexcept { + return (uintptr_t)&_mi_heap_default; +} +#endif + + +#endif diff --git a/runtime/src/mimalloc/c/include/mimalloc-new-delete.h b/runtime/src/mimalloc/c/include/mimalloc-new-delete.h new file mode 100644 index 00000000000..050f94334eb --- /dev/null +++ b/runtime/src/mimalloc/c/include/mimalloc-new-delete.h @@ -0,0 +1,52 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2018,2019 Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ +#pragma once +#ifndef MIMALLOC_NEW_DELETE_H +#define MIMALLOC_NEW_DELETE_H + +// ---------------------------------------------------------------------------- +// This header provides convenient overrides for the new and +// delete operations in C++. +// +// This header should be included in only one source file! +// +// On Windows, or when linking dynamically with mimalloc, these +// can be more performant than the standard new-delete operations. +// See +// --------------------------------------------------------------------------- +#if defined(__cplusplus) + #include + #include + + void operator delete(void* p) noexcept { mi_free(p); }; + void operator delete[](void* p) noexcept { mi_free(p); }; + + void* operator new(std::size_t n) noexcept(false) { return mi_new(n); } + void* operator new[](std::size_t n) noexcept(false) { return mi_new(n); } + + void* operator new (std::size_t n, const std::nothrow_t& tag) noexcept { (void)(tag); return mi_new_nothrow(n); } + void* operator new[](std::size_t n, const std::nothrow_t& tag) noexcept { (void)(tag); return mi_new_nothrow(n); } + + #if (__cplusplus >= 201402L || _MSC_VER >= 1916) + void operator delete (void* p, std::size_t n) { mi_free_size(p,n); }; + void operator delete[](void* p, std::size_t n) { mi_free_size(p,n); }; + #endif + + #if (__cplusplus > 201402L || defined(__cpp_aligned_new)) + void operator delete (void* p, std::align_val_t al) noexcept { mi_free_aligned(p, static_cast(al)); } + void operator delete[](void* p, std::align_val_t al) noexcept { mi_free_aligned(p, static_cast(al)); } + void operator delete (void* p, std::size_t n, std::align_val_t al) noexcept { mi_free_size_aligned(p, n, static_cast(al)); }; + void operator delete[](void* p, std::size_t n, std::align_val_t al) noexcept { mi_free_size_aligned(p, n, static_cast(al)); }; + + void* operator new( std::size_t n, std::align_val_t al) noexcept(false) { return mi_new_aligned(n, static_cast(al)); } + void* operator new[]( std::size_t n, std::align_val_t al) noexcept(false) { return mi_new_aligned(n, static_cast(al)); } + void* operator new (std::size_t n, std::align_val_t al, const std::nothrow_t&) noexcept { return mi_new_aligned_nothrow(n, static_cast(al)); } + void* operator new[](std::size_t n, std::align_val_t al, const std::nothrow_t&) noexcept { return mi_new_aligned_nothrow(n, static_cast(al)); } + #endif +#endif + +#endif // MIMALLOC_NEW_DELETE_H diff --git a/runtime/src/mimalloc/c/include/mimalloc-override.h b/runtime/src/mimalloc/c/include/mimalloc-override.h new file mode 100644 index 00000000000..201fb8b49ba --- /dev/null +++ b/runtime/src/mimalloc/c/include/mimalloc-override.h @@ -0,0 +1,66 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2018,2019 Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ +#pragma once +#ifndef MIMALLOC_OVERRIDE_H +#define MIMALLOC_OVERRIDE_H + +/* ---------------------------------------------------------------------------- +This header can be used to statically redirect malloc/free and new/delete +to the mimalloc variants. This can be useful if one can include this file on +each source file in a project (but be careful when using external code to +not accidentally mix pointers from different allocators). +-----------------------------------------------------------------------------*/ + +#include + +// Standard C allocation +#define malloc(n) mi_malloc(n) +#define calloc(n,c) mi_calloc(n,c) +#define realloc(p,n) mi_realloc(p,n) +#define free(p) mi_free(p) + +#define strdup(s) mi_strdup(s) +#define strndup(s) mi_strndup(s) +#define realpath(f,n) mi_realpath(f,n) + +// Microsoft extensions +#define _expand(p,n) mi_expand(p,n) +#define _msize(p) mi_usable_size(p) +#define _recalloc(p,n,c) mi_recalloc(p,n,c) + +#define _strdup(s) mi_strdup(s) +#define _strndup(s) mi_strndup(s) +#define _wcsdup(s) (wchar_t*)mi_wcsdup((const unsigned short*)(s)) +#define _mbsdup(s) mi_mbsdup(s) +#define _dupenv_s(b,n,v) mi_dupenv_s(b,n,v) +#define _wdupenv_s(b,n,v) mi_wdupenv_s((unsigned short*)(b),n,(const unsigned short*)(v)) + +// Various Posix and Unix variants +#define reallocf(p,n) mi_reallocf(p,n) +#define malloc_size(p) mi_usable_size(p) +#define malloc_usable_size(p) mi_usable_size(p) +#define cfree(p) mi_free(p) + +#define valloc(n) mi_valloc(n) +#define pvalloc(n) mi_pvalloc(n) +#define reallocarray(p,s,n) mi_reallocarray(p,s,n) +#define memalign(a,n) mi_memalign(a,n) +#define aligned_alloc(a,n) mi_aligned_alloc(a,n) +#define posix_memalign(p,a,n) mi_posix_memalign(p,a,n) +#define _posix_memalign(p,a,n) mi_posix_memalign(p,a,n) + +// Microsoft aligned variants +#define _aligned_malloc(n,a) mi_malloc_aligned(n,a) +#define _aligned_realloc(p,n,a) mi_realloc_aligned(p,n,a) +#define _aligned_recalloc(p,s,n,a) mi_aligned_recalloc(p,s,n,a) +#define _aligned_msize(p,a,o) mi_usable_size(p) +#define _aligned_free(p) mi_free(p) +#define _aligned_offset_malloc(n,a,o) mi_malloc_aligned_at(n,a,o) +#define _aligned_offset_realloc(p,n,a,o) mi_realloc_aligned_at(p,n,a,o) +#define _aligned_offset_recalloc(p,s,n,a,o) mi_recalloc_aligned_at(p,s,n,a,o) + +#endif // MIMALLOC_OVERRIDE_H diff --git a/runtime/src/mimalloc/c/include/mimalloc-types.h b/runtime/src/mimalloc/c/include/mimalloc-types.h new file mode 100644 index 00000000000..bbb7bfdee18 --- /dev/null +++ b/runtime/src/mimalloc/c/include/mimalloc-types.h @@ -0,0 +1,431 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2018, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ +#pragma once +#ifndef MIMALLOC_TYPES_H +#define MIMALLOC_TYPES_H + +#include // ptrdiff_t +#include // uintptr_t, uint16_t, etc +#include // _Atomic + +// ------------------------------------------------------ +// Variants +// ------------------------------------------------------ + +// Define NDEBUG in the release version to disable assertions. +#if !KONAN_MI_MALLOC + #define NDEBUG +#endif + +// Define MI_STAT as 1 to maintain statistics; set it to 2 to have detailed statistics (but costs some performance). +// #define MI_STAT 1 + +// Define MI_SECURE to enable security mitigations +// #define MI_SECURE 1 // guard page around metadata +// #define MI_SECURE 2 // guard page around each mimalloc page +// #define MI_SECURE 3 // encode free lists (detect corrupted free list (buffer overflow), and invalid pointer free) +// #define MI_SECURE 4 // checks for double free. (may be more expensive) + +#if !defined(MI_SECURE) +#define MI_SECURE 0 +#endif + +// Define MI_DEBUG for debug mode +// #define MI_DEBUG 1 // basic assertion checks and statistics, check double free, corrupted free list, and invalid pointer free. +// #define MI_DEBUG 2 // + internal assertion checks +// #define MI_DEBUG 3 // + extensive internal invariant checking (cmake -DMI_DEBUG_FULL=ON) +#if !defined(MI_DEBUG) +#if !defined(NDEBUG) || defined(_DEBUG) +#define MI_DEBUG 2 +#else +#define MI_DEBUG 0 +#endif +#endif + +// Encoded free lists allow detection of corrupted free lists +// and can detect buffer overflows and double `free`s. +#if (MI_SECURE>=3 || MI_DEBUG>=1) +#define MI_ENCODE_FREELIST 1 +#endif + +// ------------------------------------------------------ +// Platform specific values +// ------------------------------------------------------ + + +// ------------------------------------------------------ +// Size of a pointer. +// We assume that `sizeof(void*)==sizeof(intptr_t)` +// and it holds for all platforms we know of. +// +// However, the C standard only requires that: +// p == (void*)((intptr_t)p)) +// but we also need: +// i == (intptr_t)((void*)i) +// or otherwise one might define an intptr_t type that is larger than a pointer... +// ------------------------------------------------------ + +#if INTPTR_MAX == 9223372036854775807LL +# define MI_INTPTR_SHIFT (3) +#elif INTPTR_MAX == 2147483647LL +# define MI_INTPTR_SHIFT (2) +#else +#error platform must be 32 or 64 bits +#endif + +#define MI_INTPTR_SIZE (1<= 655360) +#error "define more bins" +#endif + +// The free lists use encoded next fields +// (Only actually encodes when MI_ENCODED_FREELIST is defined.) +typedef uintptr_t mi_encoded_t; + +// free lists contain blocks +typedef struct mi_block_s { + mi_encoded_t next; +} mi_block_t; + + +// The delayed flags are used for efficient multi-threaded free-ing +typedef enum mi_delayed_e { + MI_NO_DELAYED_FREE = 0, + MI_USE_DELAYED_FREE = 1, + MI_DELAYED_FREEING = 2, + MI_NEVER_DELAYED_FREE = 3 +} mi_delayed_t; + + +// The `in_full` and `has_aligned` page flags are put in a union to efficiently +// test if both are false (`full_aligned == 0`) in the `mi_free` routine. +typedef union mi_page_flags_s { + uint8_t full_aligned; + struct { + uint8_t in_full : 1; + uint8_t has_aligned : 1; + } x; +} mi_page_flags_t; + +// Thread free list. +// We use the bottom 2 bits of the pointer for mi_delayed_t flags +typedef uintptr_t mi_thread_free_t; + +// A page contains blocks of one specific size (`block_size`). +// Each page has three list of free blocks: +// `free` for blocks that can be allocated, +// `local_free` for freed blocks that are not yet available to `mi_malloc` +// `thread_free` for freed blocks by other threads +// The `local_free` and `thread_free` lists are migrated to the `free` list +// when it is exhausted. The separate `local_free` list is necessary to +// implement a monotonic heartbeat. The `thread_free` list is needed for +// avoiding atomic operations in the common case. +// +// `used - thread_freed` == actual blocks that are in use (alive) +// `used - thread_freed + |free| + |local_free| == capacity` +// +// note: we don't count `freed` (as |free|) instead of `used` to reduce +// the number of memory accesses in the `mi_page_all_free` function(s). +// note: the funny layout here is due to: +// - access is optimized for `mi_free` and `mi_page_alloc` +// - using `uint16_t` does not seem to slow things down +typedef struct mi_page_s { + // "owned" by the segment + uint8_t segment_idx; // index in the segment `pages` array, `page == &segment->pages[page->segment_idx]` + uint8_t segment_in_use:1; // `true` if the segment allocated this page + uint8_t is_reset:1; // `true` if the page memory was reset + uint8_t is_committed:1; // `true` if the page virtual memory is committed + uint8_t is_zero_init:1; // `true` if the page was zero initialized + + // layout like this to optimize access in `mi_malloc` and `mi_free` + uint16_t capacity; // number of blocks committed, must be the first field, see `segment.c:page_clear` + uint16_t reserved; // number of blocks reserved in memory + mi_page_flags_t flags; // `in_full` and `has_aligned` flags (8 bits) + bool is_zero; // `true` if the blocks in the free list are zero initialized + + mi_block_t* free; // list of available free blocks (`malloc` allocates from this list) + #ifdef MI_ENCODE_FREELIST + uintptr_t cookie; // random cookie to encode the free lists + #endif + size_t used; // number of blocks in use (including blocks in `local_free` and `thread_free`) + + mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`) + volatile _Atomic(uintptr_t) thread_freed; // at least this number of blocks are in `thread_free` + volatile _Atomic(mi_thread_free_t) thread_free; // list of deferred free blocks freed by other threads + + // less accessed info + size_t block_size; // size available in each block (always `>0`) + mi_heap_t* heap; // the owning heap + struct mi_page_s* next; // next page owned by this thread with the same `block_size` + struct mi_page_s* prev; // previous page owned by this thread with the same `block_size` + + // improve page index calculation + // without padding: 10 words on 64-bit, 11 on 32-bit. Secure adds one word + #if (MI_INTPTR_SIZE==8 && defined(MI_ENCODE_FREELIST)) || (MI_INTPTR_SIZE==4 && !defined(MI_ENCODE_FREELIST)) + void* padding[1]; // 12 words on 64-bit with cookie, 12 words on 32-bit plain + #endif +} mi_page_t; + + + +typedef enum mi_page_kind_e { + MI_PAGE_SMALL, // small blocks go into 64kb pages inside a segment + MI_PAGE_MEDIUM, // medium blocks go into 512kb pages inside a segment + MI_PAGE_LARGE, // larger blocks go into a single page spanning a whole segment + MI_PAGE_HUGE // huge blocks (>512kb) are put into a single page in a segment of the exact size (but still 2mb aligned) +} mi_page_kind_t; + +// Segments are large allocated memory blocks (2mb on 64 bit) from +// the OS. Inside segments we allocated fixed size _pages_ that +// contain blocks. +typedef struct mi_segment_s { + // memory fields + size_t memid; // id for the os-level memory manager + bool mem_is_fixed; // `true` if we cannot decommit/reset/protect in this memory (i.e. when allocated using large OS pages) + bool mem_is_committed; // `true` if the whole segment is eagerly committed + + // segment fields + struct mi_segment_s* next; // must be the first segment field -- see `segment.c:segment_alloc` + struct mi_segment_s* prev; + volatile _Atomic(struct mi_segment_s*) abandoned_next; + size_t abandoned; // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`) + size_t used; // count of pages in use (`used <= capacity`) + size_t capacity; // count of available pages (`#free + used`) + size_t segment_size;// for huge pages this may be different from `MI_SEGMENT_SIZE` + size_t segment_info_size; // space we are using from the first page for segment meta-data and possible guard pages. + uintptr_t cookie; // verify addresses in debug mode: `mi_ptr_cookie(segment) == segment->cookie` + + // layout like this to optimize access in `mi_free` + size_t page_shift; // `1 << page_shift` == the page sizes == `page->block_size * page->reserved` (unless the first page, then `-segment_info_size`). + volatile _Atomic(uintptr_t) thread_id; // unique id of the thread owning this segment + mi_page_kind_t page_kind; // kind of pages: small, large, or huge + mi_page_t pages[1]; // up to `MI_SMALL_PAGES_PER_SEGMENT` pages +} mi_segment_t; + + +// ------------------------------------------------------ +// Heaps +// Provide first-class heaps to allocate from. +// A heap just owns a set of pages for allocation and +// can only be allocate/reallocate from the thread that created it. +// Freeing blocks can be done from any thread though. +// Per thread, the segments are shared among its heaps. +// Per thread, there is always a default heap that is +// used for allocation; it is initialized to statically +// point to an empty heap to avoid initialization checks +// in the fast path. +// ------------------------------------------------------ + +// Thread local data +typedef struct mi_tld_s mi_tld_t; + +// Pages of a certain block size are held in a queue. +typedef struct mi_page_queue_s { + mi_page_t* first; + mi_page_t* last; + size_t block_size; +} mi_page_queue_t; + +#define MI_BIN_FULL (MI_BIN_HUGE+1) + +// A heap owns a set of pages. +struct mi_heap_s { + mi_tld_t* tld; + mi_page_t* pages_free_direct[MI_SMALL_WSIZE_MAX + 2]; // optimize: array where every entry points a page with possibly free blocks in the corresponding queue for that size. + mi_page_queue_t pages[MI_BIN_FULL + 1]; // queue of pages for each size class (or "bin") + volatile _Atomic(mi_block_t*) thread_delayed_free; + uintptr_t thread_id; // thread this heap belongs too + uintptr_t cookie; + uintptr_t random; // random number used for secure allocation + size_t page_count; // total number of pages in the `pages` queues. + bool no_reclaim; // `true` if this heap should not reclaim abandoned pages +}; + + + +// ------------------------------------------------------ +// Debug +// ------------------------------------------------------ + +#define MI_DEBUG_UNINIT (0xD0) +#define MI_DEBUG_FREED (0xDF) + + +#if (MI_DEBUG) +// use our own assertion to print without memory allocation +void _mi_assert_fail(const char* assertion, const char* fname, unsigned int line, const char* func ); +#define mi_assert(expr) ((expr) ? (void)0 : _mi_assert_fail(#expr,__FILE__,__LINE__,__func__)) +#else +#define mi_assert(x) +#endif + +#if (MI_DEBUG>1) +#define mi_assert_internal mi_assert +#else +#define mi_assert_internal(x) +#endif + +#if (MI_DEBUG>2) +#define mi_assert_expensive mi_assert +#else +#define mi_assert_expensive(x) +#endif + +// ------------------------------------------------------ +// Statistics +// ------------------------------------------------------ + +#ifndef MI_STAT +#if (MI_DEBUG>0) +#define MI_STAT 2 +#else +#define MI_STAT 0 +#endif +#endif + +typedef struct mi_stat_count_s { + int64_t allocated; + int64_t freed; + int64_t peak; + int64_t current; +} mi_stat_count_t; + +typedef struct mi_stat_counter_s { + int64_t total; + int64_t count; +} mi_stat_counter_t; + +typedef struct mi_stats_s { + mi_stat_count_t segments; + mi_stat_count_t pages; + mi_stat_count_t reserved; + mi_stat_count_t committed; + mi_stat_count_t reset; + mi_stat_count_t page_committed; + mi_stat_count_t segments_abandoned; + mi_stat_count_t pages_abandoned; + mi_stat_count_t threads; + mi_stat_count_t huge; + mi_stat_count_t giant; + mi_stat_count_t malloc; + mi_stat_count_t segments_cache; + mi_stat_counter_t pages_extended; + mi_stat_counter_t mmap_calls; + mi_stat_counter_t commit_calls; + mi_stat_counter_t page_no_retire; + mi_stat_counter_t searches; + mi_stat_counter_t huge_count; + mi_stat_counter_t giant_count; +#if MI_STAT>1 + mi_stat_count_t normal[MI_BIN_HUGE+1]; +#endif +} mi_stats_t; + + +void _mi_stat_increase(mi_stat_count_t* stat, size_t amount); +void _mi_stat_decrease(mi_stat_count_t* stat, size_t amount); +void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount); + +#if (MI_STAT) +#define mi_stat_increase(stat,amount) _mi_stat_increase( &(stat), amount) +#define mi_stat_decrease(stat,amount) _mi_stat_decrease( &(stat), amount) +#define mi_stat_counter_increase(stat,amount) _mi_stat_counter_increase( &(stat), amount) +#else +#define mi_stat_increase(stat,amount) (void)0 +#define mi_stat_decrease(stat,amount) (void)0 +#define mi_stat_counter_increase(stat,amount) (void)0 +#endif + +#define mi_heap_stat_increase(heap,stat,amount) mi_stat_increase( (heap)->tld->stats.stat, amount) +#define mi_heap_stat_decrease(heap,stat,amount) mi_stat_decrease( (heap)->tld->stats.stat, amount) + + +// ------------------------------------------------------ +// Thread Local data +// ------------------------------------------------------ + +// Queue of segments +typedef struct mi_segment_queue_s { + mi_segment_t* first; + mi_segment_t* last; +} mi_segment_queue_t; + + +// Segments thread local data +typedef struct mi_segments_tld_s { + mi_segment_queue_t small_free; // queue of segments with free small pages + mi_segment_queue_t medium_free; // queue of segments with free medium pages + size_t count; // current number of segments; + size_t peak_count; // peak number of segments + size_t current_size; // current size of all segments + size_t peak_size; // peak size of all segments + size_t cache_count; // number of segments in the cache + size_t cache_size; // total size of all segments in the cache + mi_segment_t* cache; // (small) cache of segments + mi_stats_t* stats; // points to tld stats +} mi_segments_tld_t; + +// OS thread local data +typedef struct mi_os_tld_s { + size_t region_idx; // start point for next allocation + mi_stats_t* stats; // points to tld stats +} mi_os_tld_t; + +// Thread local data +struct mi_tld_s { + unsigned long long heartbeat; // monotonic heartbeat count + bool recurse; // true if deferred was called; used to prevent infinite recursion. + mi_heap_t* heap_backing; // backing heap of this thread (cannot be deleted) + mi_segments_tld_t segments; // segment tld + mi_os_tld_t os; // os tld + mi_stats_t stats; // statistics +}; + +#endif diff --git a/runtime/src/mimalloc/c/include/mimalloc.h b/runtime/src/mimalloc/c/include/mimalloc.h new file mode 100644 index 00000000000..7f26896c039 --- /dev/null +++ b/runtime/src/mimalloc/c/include/mimalloc.h @@ -0,0 +1,330 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2018, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ +#pragma once +#ifndef MIMALLOC_H +#define MIMALLOC_H + +#define MI_MALLOC_VERSION 120 // major + 2 digits minor + +// ------------------------------------------------------ +// Compiler specific attributes +// ------------------------------------------------------ + +#ifdef __cplusplus + #if (__GNUC__ <= 5) || (_MSC_VER <= 1900) + #define mi_attr_noexcept throw() + #else + #define mi_attr_noexcept noexcept + #endif +#else + #define mi_attr_noexcept +#endif + +#ifdef _MSC_VER + #if !defined(MI_SHARED_LIB) + #define mi_decl_export + #elif defined(MI_SHARED_LIB_EXPORT) + #define mi_decl_export __declspec(dllexport) + #else + #define mi_decl_export __declspec(dllimport) + #endif + #if (_MSC_VER >= 1900) && !defined(__EDG__) + #define mi_decl_allocator __declspec(allocator) __declspec(restrict) + #else + #define mi_decl_allocator __declspec(restrict) + #endif + #define mi_decl_thread __declspec(thread) + #define mi_attr_malloc + #define mi_attr_alloc_size(s) + #define mi_attr_alloc_size2(s1,s2) + #define mi_cdecl __cdecl +#elif defined(__GNUC__) || defined(__clang__) + #define mi_decl_thread __thread + #define mi_decl_export __attribute__((visibility("default"))) + #define mi_decl_allocator + #define mi_attr_malloc __attribute__((malloc)) + #if defined(__clang_major__) && (__clang_major__ < 4) + #define mi_attr_alloc_size(s) + #define mi_attr_alloc_size2(s1,s2) + #else + #define mi_attr_alloc_size(s) __attribute__((alloc_size(s))) + #define mi_attr_alloc_size2(s1,s2) __attribute__((alloc_size(s1,s2))) + #endif + #define mi_cdecl // leads to warnings... __attribute__((cdecl)) +#else + #define mi_decl_thread __thread + #define mi_decl_export + #define mi_decl_allocator + #define mi_attr_malloc + #define mi_attr_alloc_size(s) + #define mi_attr_alloc_size2(s1,s2) + #define mi_cdecl +#endif + +// ------------------------------------------------------ +// Includes +// ------------------------------------------------------ + +#include // size_t +#include // bool + +#ifdef __cplusplus +extern "C" { +#endif + +// ------------------------------------------------------ +// Standard malloc interface +// ------------------------------------------------------ + +mi_decl_export mi_decl_allocator void* mi_malloc(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); +mi_decl_export mi_decl_allocator void* mi_calloc(size_t count, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(1,2); +mi_decl_export mi_decl_allocator void* mi_realloc(void* p, size_t newsize) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); +mi_decl_export mi_decl_allocator void* mi_expand(void* p, size_t newsize) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); + +mi_decl_export void mi_free(void* p) mi_attr_noexcept; +mi_decl_export char* mi_strdup(const char* s) mi_attr_noexcept; +mi_decl_export char* mi_strndup(const char* s, size_t n) mi_attr_noexcept; +mi_decl_export char* mi_realpath(const char* fname, char* resolved_name) mi_attr_noexcept; + +// ------------------------------------------------------ +// Extended functionality +// ------------------------------------------------------ +#define MI_SMALL_WSIZE_MAX (128) +#define MI_SMALL_SIZE_MAX (MI_SMALL_WSIZE_MAX*sizeof(void*)) + +mi_decl_export mi_decl_allocator void* mi_malloc_small(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); +mi_decl_export mi_decl_allocator void* mi_zalloc_small(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); +mi_decl_export mi_decl_allocator void* mi_zalloc(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); + +mi_decl_export mi_decl_allocator void* mi_mallocn(size_t count, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(1,2); +mi_decl_export mi_decl_allocator void* mi_reallocn(void* p, size_t count, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2,3); +mi_decl_export mi_decl_allocator void* mi_reallocf(void* p, size_t newsize) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); + + +mi_decl_export size_t mi_usable_size(const void* p) mi_attr_noexcept; +mi_decl_export size_t mi_good_size(size_t size) mi_attr_noexcept; + +typedef void (mi_deferred_free_fun)(bool force, unsigned long long heartbeat); +mi_decl_export void mi_register_deferred_free(mi_deferred_free_fun* deferred_free) mi_attr_noexcept; + +typedef void (mi_output_fun)(const char* msg); +mi_decl_export void mi_register_output(mi_output_fun* out) mi_attr_noexcept; + +mi_decl_export void mi_collect(bool force) mi_attr_noexcept; +mi_decl_export int mi_version(void) mi_attr_noexcept; +mi_decl_export void mi_stats_reset(void) mi_attr_noexcept; +mi_decl_export void mi_stats_merge(void) mi_attr_noexcept; +mi_decl_export void mi_stats_print(mi_output_fun* out) mi_attr_noexcept; + +mi_decl_export void mi_process_init(void) mi_attr_noexcept; +mi_decl_export void mi_thread_init(void) mi_attr_noexcept; +mi_decl_export void mi_thread_done(void) mi_attr_noexcept; +mi_decl_export void mi_thread_stats_print(mi_output_fun* out) mi_attr_noexcept; + + +// ------------------------------------------------------------------------------------- +// Aligned allocation +// Note that `alignment` always follows `size` for consistency with unaligned +// allocation, but unfortunately this differs from `posix_memalign` and `aligned_alloc`. +// ------------------------------------------------------------------------------------- + +mi_decl_export mi_decl_allocator void* mi_malloc_aligned(size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); +mi_decl_export mi_decl_allocator void* mi_malloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); +mi_decl_export mi_decl_allocator void* mi_zalloc_aligned(size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); +mi_decl_export mi_decl_allocator void* mi_zalloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); +mi_decl_export mi_decl_allocator void* mi_calloc_aligned(size_t count, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(1,2); +mi_decl_export mi_decl_allocator void* mi_calloc_aligned_at(size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(1,2); +mi_decl_export mi_decl_allocator void* mi_realloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); +mi_decl_export mi_decl_allocator void* mi_realloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); + + +// ------------------------------------------------------ +// Heaps +// ------------------------------------------------------ +struct mi_heap_s; +typedef struct mi_heap_s mi_heap_t; + +mi_decl_export mi_heap_t* mi_heap_new(void); +mi_decl_export void mi_heap_delete(mi_heap_t* heap); +mi_decl_export void mi_heap_destroy(mi_heap_t* heap); +mi_decl_export mi_heap_t* mi_heap_set_default(mi_heap_t* heap); +mi_decl_export mi_heap_t* mi_heap_get_default(void); +mi_decl_export mi_heap_t* mi_heap_get_backing(void); +mi_decl_export void mi_heap_collect(mi_heap_t* heap, bool force) mi_attr_noexcept; + +mi_decl_export mi_decl_allocator void* mi_heap_malloc(mi_heap_t* heap, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); +mi_decl_export mi_decl_allocator void* mi_heap_zalloc(mi_heap_t* heap, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); +mi_decl_export mi_decl_allocator void* mi_heap_calloc(mi_heap_t* heap, size_t count, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2, 3); +mi_decl_export mi_decl_allocator void* mi_heap_mallocn(mi_heap_t* heap, size_t count, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2, 3); +mi_decl_export mi_decl_allocator void* mi_heap_malloc_small(mi_heap_t* heap, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); + +mi_decl_export mi_decl_allocator void* mi_heap_realloc(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(3); +mi_decl_export mi_decl_allocator void* mi_heap_reallocn(mi_heap_t* heap, void* p, size_t count, size_t size) mi_attr_noexcept; +mi_decl_export mi_decl_allocator void* mi_heap_reallocf(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(3); + +mi_decl_export char* mi_heap_strdup(mi_heap_t* heap, const char* s) mi_attr_noexcept; +mi_decl_export char* mi_heap_strndup(mi_heap_t* heap, const char* s, size_t n) mi_attr_noexcept; +mi_decl_export char* mi_heap_realpath(mi_heap_t* heap, const char* fname, char* resolved_name) mi_attr_noexcept; + +mi_decl_export mi_decl_allocator void* mi_heap_malloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); +mi_decl_export mi_decl_allocator void* mi_heap_malloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); +mi_decl_export mi_decl_allocator void* mi_heap_zalloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); +mi_decl_export mi_decl_allocator void* mi_heap_zalloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); +mi_decl_export mi_decl_allocator void* mi_heap_calloc_aligned(mi_heap_t* heap, size_t count, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2, 3); +mi_decl_export mi_decl_allocator void* mi_heap_calloc_aligned_at(mi_heap_t* heap, size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2, 3); +mi_decl_export mi_decl_allocator void* mi_heap_realloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(3); +mi_decl_export mi_decl_allocator void* mi_heap_realloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(3); + + +// -------------------------------------------------------------------------------- +// Zero initialized re-allocation. +// Only valid on memory that was originally allocated with zero initialization too. +// e.g. `mi_calloc`, `mi_zalloc`, `mi_zalloc_aligned` etc. +// see +// -------------------------------------------------------------------------------- + +mi_decl_export mi_decl_allocator void* mi_rezalloc(void* p, size_t newsize) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); +mi_decl_export mi_decl_allocator void* mi_recalloc(void* p, size_t newcount, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2,3); + +mi_decl_export mi_decl_allocator void* mi_rezalloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); +mi_decl_export mi_decl_allocator void* mi_rezalloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); +mi_decl_export mi_decl_allocator void* mi_recalloc_aligned(void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2,3); +mi_decl_export mi_decl_allocator void* mi_recalloc_aligned_at(void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2,3); + +mi_decl_export mi_decl_allocator void* mi_heap_rezalloc(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(3); +mi_decl_export mi_decl_allocator void* mi_heap_recalloc(mi_heap_t* heap, void* p, size_t newcount, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(3,4); + +mi_decl_export mi_decl_allocator void* mi_heap_rezalloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(3); +mi_decl_export mi_decl_allocator void* mi_heap_rezalloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(3); +mi_decl_export mi_decl_allocator void* mi_heap_recalloc_aligned(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(3,4); +mi_decl_export mi_decl_allocator void* mi_heap_recalloc_aligned_at(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(3,4); + + +// ------------------------------------------------------ +// Analysis +// ------------------------------------------------------ + +mi_decl_export bool mi_heap_contains_block(mi_heap_t* heap, const void* p); + +mi_decl_export bool mi_heap_check_owned(mi_heap_t* heap, const void* p); +mi_decl_export bool mi_check_owned(const void* p); + +// An area of heap space contains blocks of a single size. +typedef struct mi_heap_area_s { + void* blocks; // start of the area containing heap blocks + size_t reserved; // bytes reserved for this area (virtual) + size_t committed; // current available bytes for this area + size_t used; // bytes in use by allocated blocks + size_t block_size; // size in bytes of each block +} mi_heap_area_t; + +typedef bool (mi_cdecl mi_block_visit_fun)(const mi_heap_t* heap, const mi_heap_area_t* area, void* block, size_t block_size, void* arg); + +mi_decl_export bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_all_blocks, mi_block_visit_fun* visitor, void* arg); + +// Experimental +mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept; +mi_decl_export int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept; +mi_decl_export bool mi_is_redirected() mi_attr_noexcept; + +// ------------------------------------------------------ +// Convenience +// ------------------------------------------------------ + +#define mi_malloc_tp(tp) ((tp*)mi_malloc(sizeof(tp))) +#define mi_zalloc_tp(tp) ((tp*)mi_zalloc(sizeof(tp))) +#define mi_calloc_tp(tp,n) ((tp*)mi_calloc(n,sizeof(tp))) +#define mi_mallocn_tp(tp,n) ((tp*)mi_mallocn(n,sizeof(tp))) +#define mi_reallocn_tp(p,tp,n) ((tp*)mi_reallocn(p,n,sizeof(tp))) +#define mi_recalloc_tp(p,tp,n) ((tp*)mi_recalloc(p,n,sizeof(tp))) + +#define mi_heap_malloc_tp(hp,tp) ((tp*)mi_heap_malloc(hp,sizeof(tp))) +#define mi_heap_zalloc_tp(hp,tp) ((tp*)mi_heap_zalloc(hp,sizeof(tp))) +#define mi_heap_calloc_tp(hp,tp,n) ((tp*)mi_heap_calloc(hp,n,sizeof(tp))) +#define mi_heap_mallocn_tp(hp,tp,n) ((tp*)mi_heap_mallocn(hp,n,sizeof(tp))) +#define mi_heap_reallocn_tp(hp,p,tp,n) ((tp*)mi_heap_reallocn(hp,p,n,sizeof(tp))) +#define mi_heap_recalloc_tp(hp,p,tp,n) ((tp*)mi_heap_recalloc(hp,p,n,sizeof(tp))) + + +// ------------------------------------------------------ +// Options, all `false` by default +// ------------------------------------------------------ + +typedef enum mi_option_e { + // stable options + mi_option_show_errors, + mi_option_show_stats, + mi_option_verbose, + // the following options are experimental + mi_option_eager_commit, + mi_option_eager_region_commit, + mi_option_large_os_pages, // implies eager commit + mi_option_reserve_huge_os_pages, + mi_option_segment_cache, + mi_option_page_reset, + mi_option_cache_reset, + mi_option_reset_decommits, + mi_option_eager_commit_delay, + mi_option_segment_reset, + mi_option_os_tag, + mi_option_max_errors, + _mi_option_last +} mi_option_t; + + +mi_decl_export bool mi_option_is_enabled(mi_option_t option); +mi_decl_export void mi_option_enable(mi_option_t option); +mi_decl_export void mi_option_disable(mi_option_t option); +mi_decl_export void mi_option_set_enabled(mi_option_t option, bool enable); +mi_decl_export void mi_option_set_enabled_default(mi_option_t option, bool enable); + +mi_decl_export long mi_option_get(mi_option_t option); +mi_decl_export void mi_option_set(mi_option_t option, long value); +mi_decl_export void mi_option_set_default(mi_option_t option, long value); + + +// ------------------------------------------------------------------------------------------------------- +// "mi" prefixed implementations of various posix, Unix, Windows, and C++ allocation functions. +// (This can be convenient when providing overrides of these functions as done in `mimalloc-override.h`.) +// ------------------------------------------------------------------------------------------------------- + +mi_decl_export size_t mi_malloc_size(const void* p) mi_attr_noexcept; +mi_decl_export size_t mi_malloc_usable_size(const void *p) mi_attr_noexcept; +mi_decl_export void mi_cfree(void* p) mi_attr_noexcept; +mi_decl_export void* mi__expand(void* p, size_t newsize) mi_attr_noexcept; + +mi_decl_export int mi_posix_memalign(void** p, size_t alignment, size_t size) mi_attr_noexcept; +mi_decl_export void* mi_memalign(size_t alignment, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); +mi_decl_export void* mi_valloc(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); + +mi_decl_export void* mi_pvalloc(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); +mi_decl_export void* mi_aligned_alloc(size_t alignment, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); +mi_decl_export void* mi_reallocarray(void* p, size_t count, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2,3); + +mi_decl_export void* mi_aligned_recalloc(void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept; +mi_decl_export void* mi_aligned_offset_recalloc(void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept; + +mi_decl_export unsigned short* mi_wcsdup(const unsigned short* s) mi_attr_noexcept; +mi_decl_export unsigned char* mi_mbsdup(const unsigned char* s) mi_attr_noexcept; +mi_decl_export int mi_dupenv_s(char** buf, size_t* size, const char* name) mi_attr_noexcept; +mi_decl_export int mi_wdupenv_s(unsigned short** buf, size_t* size, const unsigned short* name) mi_attr_noexcept; + +mi_decl_export void mi_free_size(void* p, size_t size) mi_attr_noexcept; +mi_decl_export void mi_free_size_aligned(void* p, size_t size, size_t alignment) mi_attr_noexcept; +mi_decl_export void mi_free_aligned(void* p, size_t alignment) mi_attr_noexcept; + +mi_decl_export void* mi_new(size_t n) mi_attr_malloc mi_attr_alloc_size(1); +mi_decl_export void* mi_new_aligned(size_t n, size_t alignment) mi_attr_malloc mi_attr_alloc_size(1); +mi_decl_export void* mi_new_nothrow(size_t n) mi_attr_malloc mi_attr_alloc_size(1); +mi_decl_export void* mi_new_aligned_nothrow(size_t n, size_t alignment) mi_attr_malloc mi_attr_alloc_size(1); + +#ifdef __cplusplus +} +#endif + + +#endif diff --git a/runtime/src/mimalloc/c/init.c b/runtime/src/mimalloc/c/init.c new file mode 100644 index 00000000000..81413aa92ff --- /dev/null +++ b/runtime/src/mimalloc/c/init.c @@ -0,0 +1,549 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2018, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ +#include "mimalloc.h" +#include "mimalloc-internal.h" + +#include // memcpy, memset +#include // atexit + +// Empty page used to initialize the small free pages array +const mi_page_t _mi_page_empty = { + 0, false, false, false, false, 0, 0, + { 0 }, false, + NULL, // free + #if MI_ENCODE_FREELIST + 0, + #endif + 0, // used + NULL, + ATOMIC_VAR_INIT(0), ATOMIC_VAR_INIT(0), + 0, NULL, NULL, NULL + #if (MI_INTPTR_SIZE==8 && defined(MI_ENCODE_FREELIST)) || (MI_INTPTR_SIZE==4 && !defined(MI_ENCODE_FREELIST)) + , { NULL } // padding + #endif +}; + +#define MI_PAGE_EMPTY() ((mi_page_t*)&_mi_page_empty) +#define MI_SMALL_PAGES_EMPTY \ + { MI_INIT128(MI_PAGE_EMPTY), MI_PAGE_EMPTY(), MI_PAGE_EMPTY() } + + +// Empty page queues for every bin +#define QNULL(sz) { NULL, NULL, (sz)*sizeof(uintptr_t) } +#define MI_PAGE_QUEUES_EMPTY \ + { QNULL(1), \ + QNULL( 1), QNULL( 2), QNULL( 3), QNULL( 4), QNULL( 5), QNULL( 6), QNULL( 7), QNULL( 8), /* 8 */ \ + QNULL( 10), QNULL( 12), QNULL( 14), QNULL( 16), QNULL( 20), QNULL( 24), QNULL( 28), QNULL( 32), /* 16 */ \ + QNULL( 40), QNULL( 48), QNULL( 56), QNULL( 64), QNULL( 80), QNULL( 96), QNULL( 112), QNULL( 128), /* 24 */ \ + QNULL( 160), QNULL( 192), QNULL( 224), QNULL( 256), QNULL( 320), QNULL( 384), QNULL( 448), QNULL( 512), /* 32 */ \ + QNULL( 640), QNULL( 768), QNULL( 896), QNULL( 1024), QNULL( 1280), QNULL( 1536), QNULL( 1792), QNULL( 2048), /* 40 */ \ + QNULL( 2560), QNULL( 3072), QNULL( 3584), QNULL( 4096), QNULL( 5120), QNULL( 6144), QNULL( 7168), QNULL( 8192), /* 48 */ \ + QNULL( 10240), QNULL( 12288), QNULL( 14336), QNULL( 16384), QNULL( 20480), QNULL( 24576), QNULL( 28672), QNULL( 32768), /* 56 */ \ + QNULL( 40960), QNULL( 49152), QNULL( 57344), QNULL( 65536), QNULL( 81920), QNULL( 98304), QNULL(114688), QNULL(131072), /* 64 */ \ + QNULL(163840), QNULL(196608), QNULL(229376), QNULL(262144), QNULL(327680), QNULL(393216), QNULL(458752), QNULL(524288), /* 72 */ \ + QNULL(MI_LARGE_OBJ_WSIZE_MAX + 1 /* 655360, Huge queue */), \ + QNULL(MI_LARGE_OBJ_WSIZE_MAX + 2) /* Full queue */ } + +#define MI_STAT_COUNT_NULL() {0,0,0,0} + +// Empty statistics +#if MI_STAT>1 +#define MI_STAT_COUNT_END_NULL() , { MI_STAT_COUNT_NULL(), MI_INIT32(MI_STAT_COUNT_NULL) } +#else +#define MI_STAT_COUNT_END_NULL() +#endif + +#define MI_STATS_NULL \ + MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ + MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ + MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ + MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ + MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ + MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ + MI_STAT_COUNT_NULL(), \ + { 0, 0 }, { 0, 0 }, { 0, 0 }, \ + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 } \ + MI_STAT_COUNT_END_NULL() + +// -------------------------------------------------------- +// Statically allocate an empty heap as the initial +// thread local value for the default heap, +// and statically allocate the backing heap for the main +// thread so it can function without doing any allocation +// itself (as accessing a thread local for the first time +// may lead to allocation itself on some platforms) +// -------------------------------------------------------- + +const mi_heap_t _mi_heap_empty = { + NULL, + MI_SMALL_PAGES_EMPTY, + MI_PAGE_QUEUES_EMPTY, + ATOMIC_VAR_INIT(NULL), + 0, + 0, + 0, + 0, + false +}; + +// the thread-local default heap for allocation +mi_decl_thread mi_heap_t* _mi_heap_default = (mi_heap_t*)&_mi_heap_empty; + + +#define tld_main_stats ((mi_stats_t*)((uint8_t*)&tld_main + offsetof(mi_tld_t,stats))) + +static mi_tld_t tld_main = { + 0, false, + &_mi_heap_main, + { { NULL, NULL }, {NULL ,NULL}, 0, 0, 0, 0, 0, 0, NULL, tld_main_stats }, // segments + { 0, tld_main_stats }, // os + { MI_STATS_NULL } // stats +}; + +mi_heap_t _mi_heap_main = { + &tld_main, + MI_SMALL_PAGES_EMPTY, + MI_PAGE_QUEUES_EMPTY, + NULL, + 0, // thread id +#if MI_INTPTR_SIZE==8 // the cookie of the main heap can be fixed (unlike page cookies that need to be secure!) + 0xCDCDCDCDCDCDCDCDUL, +#else + 0xCDCDCDCDUL, +#endif + 0, // random + 0, // page count + false // can reclaim +}; + +bool _mi_process_is_initialized = false; // set to `true` in `mi_process_init`. + +mi_stats_t _mi_stats_main = { MI_STATS_NULL }; + +/* ----------------------------------------------------------- + Initialization of random numbers +----------------------------------------------------------- */ + +#if defined(_WIN32) +#include +#elif defined(__APPLE__) +#include +#else +#include +#endif + +uintptr_t _mi_random_shuffle(uintptr_t x) { + #if (MI_INTPTR_SIZE==8) + // by Sebastiano Vigna, see: + x ^= x >> 30; + x *= 0xbf58476d1ce4e5b9UL; + x ^= x >> 27; + x *= 0x94d049bb133111ebUL; + x ^= x >> 31; + #elif (MI_INTPTR_SIZE==4) + // by Chris Wellons, see: + x ^= x >> 16; + x *= 0x7feb352dUL; + x ^= x >> 15; + x *= 0x846ca68bUL; + x ^= x >> 16; + #endif + return x; +} + +uintptr_t _mi_random_init(uintptr_t seed /* can be zero */) { +#ifdef __wasi__ // no ASLR when using WebAssembly, and time granularity may be coarse + uintptr_t x; + arc4random_buf(&x, sizeof x); +#else + // Hopefully, ASLR makes our function address random + uintptr_t x = (uintptr_t)((void*)&_mi_random_init); + x ^= seed; + // xor with high res time +#if defined(_WIN32) + LARGE_INTEGER pcount; + QueryPerformanceCounter(&pcount); + x ^= (uintptr_t)(pcount.QuadPart); +#elif defined(__APPLE__) + x ^= (uintptr_t)mach_absolute_time(); +#else + struct timespec time; + clock_gettime(CLOCK_MONOTONIC, &time); + x ^= (uintptr_t)time.tv_sec; + x ^= (uintptr_t)time.tv_nsec; +#endif + // and do a few randomization steps + uintptr_t max = ((x ^ (x >> 17)) & 0x0F) + 1; + for (uintptr_t i = 0; i < max; i++) { + x = _mi_random_shuffle(x); + } +#endif + return x; +} + +/* ----------------------------------------------------------- + Initialization and freeing of the thread local heaps +----------------------------------------------------------- */ + +typedef struct mi_thread_data_s { + mi_heap_t heap; // must come first due to cast in `_mi_heap_done` + mi_tld_t tld; +} mi_thread_data_t; + +// Initialize the thread local default heap, called from `mi_thread_init` +static bool _mi_heap_init(void) { + if (mi_heap_is_initialized(_mi_heap_default)) return true; + if (_mi_is_main_thread()) { + // the main heap is statically allocated + _mi_heap_set_default_direct(&_mi_heap_main); + mi_assert_internal(_mi_heap_default->tld->heap_backing == mi_get_default_heap()); + } + else { + // use `_mi_os_alloc` to allocate directly from the OS + mi_thread_data_t* td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t),&_mi_stats_main); // Todo: more efficient allocation? + if (td == NULL) { + _mi_error_message("failed to allocate thread local heap memory\n"); + return false; + } + mi_tld_t* tld = &td->tld; + mi_heap_t* heap = &td->heap; + memcpy(heap, &_mi_heap_empty, sizeof(*heap)); + heap->thread_id = _mi_thread_id(); + heap->random = _mi_random_init(heap->thread_id); + heap->cookie = ((uintptr_t)heap ^ _mi_heap_random(heap)) | 1; + heap->tld = tld; + memset(tld, 0, sizeof(*tld)); + tld->heap_backing = heap; + tld->segments.stats = &tld->stats; + tld->os.stats = &tld->stats; + _mi_heap_set_default_direct(heap); + } + return false; +} + +// Free the thread local default heap (called from `mi_thread_done`) +static bool _mi_heap_done(mi_heap_t* heap) { + if (!mi_heap_is_initialized(heap)) return true; + + // reset default heap + _mi_heap_set_default_direct(_mi_is_main_thread() ? &_mi_heap_main : (mi_heap_t*)&_mi_heap_empty); + + // todo: delete all non-backing heaps? + + // switch to backing heap and free it + heap = heap->tld->heap_backing; + if (!mi_heap_is_initialized(heap)) return false; + + // collect if not the main thread + if (heap != &_mi_heap_main) { + _mi_heap_collect_abandon(heap); + } + + // merge stats + _mi_stats_done(&heap->tld->stats); + + // free if not the main thread + if (heap != &_mi_heap_main) { + _mi_os_free(heap, sizeof(mi_thread_data_t), &_mi_stats_main); + } +#if (MI_DEBUG > 0) + else { + _mi_heap_destroy_pages(heap); + mi_assert_internal(heap->tld->heap_backing == &_mi_heap_main); + } +#endif + return false; +} + + + +// -------------------------------------------------------- +// Try to run `mi_thread_done()` automatically so any memory +// owned by the thread but not yet released can be abandoned +// and re-owned by another thread. +// +// 1. windows dynamic library: +// call from DllMain on DLL_THREAD_DETACH +// 2. windows static library: +// use `FlsAlloc` to call a destructor when the thread is done +// 3. unix, pthreads: +// use a pthread key to call a destructor when a pthread is done +// +// In the last two cases we also need to call `mi_process_init` +// to set up the thread local keys. +// -------------------------------------------------------- + +static void _mi_thread_done(mi_heap_t* default_heap); + +#ifdef __wasi__ +// no pthreads in the WebAssembly Standard Interface +#elif !defined(_WIN32) +#define MI_USE_PTHREADS +#endif + +#if defined(_WIN32) && defined(MI_SHARED_LIB) + // nothing to do as it is done in DllMain +#elif defined(_WIN32) && !defined(MI_SHARED_LIB) + // use thread local storage keys to detect thread ending + #include + #include + static DWORD mi_fls_key; + static void NTAPI mi_fls_done(PVOID value) { + if (value!=NULL) _mi_thread_done((mi_heap_t*)value); + } +#elif defined(MI_USE_PTHREADS) + // use pthread locol storage keys to detect thread ending + #include + static pthread_key_t mi_pthread_key; + static void mi_pthread_done(void* value) { + if (value!=NULL) _mi_thread_done((mi_heap_t*)value); + } +#elif defined(__wasi__) +// no pthreads in the WebAssembly Standard Interface +#else + #pragma message("define a way to call mi_thread_done when a thread is done") +#endif + +// Set up handlers so `mi_thread_done` is called automatically +static void mi_process_setup_auto_thread_done(void) { + static bool tls_initialized = false; // fine if it races + if (tls_initialized) return; + tls_initialized = true; + #if defined(_WIN32) && defined(MI_SHARED_LIB) + // nothing to do as it is done in DllMain + #elif defined(_WIN32) && !defined(MI_SHARED_LIB) + mi_fls_key = FlsAlloc(&mi_fls_done); + #elif defined(MI_USE_PTHREADS) + pthread_key_create(&mi_pthread_key, &mi_pthread_done); + #endif +} + + +bool _mi_is_main_thread(void) { + return (_mi_heap_main.thread_id==0 || _mi_heap_main.thread_id == _mi_thread_id()); +} + +// This is called from the `mi_malloc_generic` +void mi_thread_init(void) mi_attr_noexcept +{ + // ensure our process has started already + mi_process_init(); + + // initialize the thread local default heap + // (this will call `_mi_heap_set_default_direct` and thus set the + // fiber/pthread key to a non-zero value, ensuring `_mi_thread_done` is called) + if (_mi_heap_init()) return; // returns true if already initialized + + // don't further initialize for the main thread + if (_mi_is_main_thread()) return; + + _mi_stat_increase(&mi_get_default_heap()->tld->stats.threads, 1); + + //_mi_verbose_message("thread init: 0x%zx\n", _mi_thread_id()); +} + +void mi_thread_done(void) mi_attr_noexcept { + _mi_thread_done(mi_get_default_heap()); +} + +static void _mi_thread_done(mi_heap_t* heap) { + // stats + if (!_mi_is_main_thread() && mi_heap_is_initialized(heap)) { + _mi_stat_decrease(&heap->tld->stats.threads, 1); + } + // abandon the thread local heap + if (_mi_heap_done(heap)) return; // returns true if already ran +} + +void _mi_heap_set_default_direct(mi_heap_t* heap) { + mi_assert_internal(heap != NULL); + _mi_heap_default = heap; + + // ensure the default heap is passed to `_mi_thread_done` + // setting to a non-NULL value also ensures `mi_thread_done` is called. + #if defined(_WIN32) && defined(MI_SHARED_LIB) + // nothing to do as it is done in DllMain + #elif defined(_WIN32) && !defined(MI_SHARED_LIB) + FlsSetValue(mi_fls_key, heap); + #elif defined(MI_USE_PTHREADS) + pthread_setspecific(mi_pthread_key, heap); + #endif +} + + + +// -------------------------------------------------------- +// Run functions on process init/done, and thread init/done +// -------------------------------------------------------- +static void mi_process_done(void); + +static bool os_preloading = true; // true until this module is initialized +static bool mi_redirected = false; // true if malloc redirects to mi_malloc + +// Returns true if this module has not been initialized; Don't use C runtime routines until it returns false. +bool _mi_preloading() { + return os_preloading; +} + +bool mi_is_redirected() mi_attr_noexcept { + return mi_redirected; +} + +// Communicate with the redirection module on Windows +#if defined(_WIN32) && defined(MI_SHARED_LIB) +#ifdef __cplusplus +extern "C" { +#endif +mi_decl_export void _mi_redirect_entry(DWORD reason) { + // called on redirection; careful as this may be called before DllMain + if (reason == DLL_PROCESS_ATTACH) { + mi_redirected = true; + } + else if (reason == DLL_PROCESS_DETACH) { + mi_redirected = false; + } + else if (reason == DLL_THREAD_DETACH) { + mi_thread_done(); + } +} +__declspec(dllimport) bool mi_allocator_init(const char** message); +__declspec(dllimport) void mi_allocator_done(); +#ifdef __cplusplus +} +#endif +#else +static bool mi_allocator_init(const char** message) { + if (message != NULL) *message = NULL; + return true; +} +static void mi_allocator_done() { + // nothing to do +} +#endif + +// Called once by the process loader +static void mi_process_load(void) { + os_preloading = false; + atexit(&mi_process_done); + _mi_options_init(); + mi_process_init(); + //mi_stats_reset(); + if (mi_redirected) _mi_verbose_message("malloc is redirected.\n"); + + // show message from the redirector (if present) + const char* msg = NULL; + mi_allocator_init(&msg); + if (msg != NULL && (mi_option_is_enabled(mi_option_verbose) || mi_option_is_enabled(mi_option_show_errors))) { + _mi_fputs(NULL,NULL,msg); + } + + if (mi_option_is_enabled(mi_option_reserve_huge_os_pages)) { + size_t pages = mi_option_get(mi_option_reserve_huge_os_pages); + double max_secs = (double)pages / 2.0; // 0.5s per page (1GiB) + mi_reserve_huge_os_pages(pages, max_secs, NULL); + } +} + +// Initialize the process; called by thread_init or the process loader +void mi_process_init(void) mi_attr_noexcept { + // ensure we are called once + if (_mi_process_is_initialized) return; + // access _mi_heap_default before setting _mi_process_is_initialized to ensure + // that the TLS slot is allocated without getting into recursion on macOS + // when using dynamic linking with interpose. + mi_heap_t* h = mi_get_default_heap(); + _mi_process_is_initialized = true; + + _mi_heap_main.thread_id = _mi_thread_id(); + _mi_verbose_message("process init: 0x%zx\n", _mi_heap_main.thread_id); + uintptr_t random = _mi_random_init(_mi_heap_main.thread_id) ^ (uintptr_t)h; + #ifndef __APPLE__ + _mi_heap_main.cookie = (uintptr_t)&_mi_heap_main ^ random; + #endif + _mi_heap_main.random = _mi_random_shuffle(random); + mi_process_setup_auto_thread_done(); + _mi_os_init(); + #if (MI_DEBUG) + _mi_verbose_message("debug level : %d\n", MI_DEBUG); + #endif + _mi_verbose_message("secure level: %d\n", MI_SECURE); + mi_thread_init(); + mi_stats_reset(); // only call stat reset *after* thread init (or the heap tld == NULL) +} + +// Called when the process is done (through `at_exit`) +static void mi_process_done(void) { + // only shutdown if we were initialized + if (!_mi_process_is_initialized) return; + // ensure we are called once + static bool process_done = false; + if (process_done) return; + process_done = true; + + #ifndef NDEBUG + mi_collect(true); + #endif + if (mi_option_is_enabled(mi_option_show_stats) || + mi_option_is_enabled(mi_option_verbose)) { + mi_stats_print(NULL); + } + mi_allocator_done(); + _mi_verbose_message("process done: 0x%zx\n", _mi_heap_main.thread_id); + os_preloading = true; // don't call the C runtime anymore +} + + + +#if defined(_WIN32) && defined(MI_SHARED_LIB) + // Windows DLL: easy to hook into process_init and thread_done + __declspec(dllexport) BOOL WINAPI DllMain(HINSTANCE inst, DWORD reason, LPVOID reserved) { + UNUSED(reserved); + UNUSED(inst); + if (reason==DLL_PROCESS_ATTACH) { + mi_process_load(); + } + else if (reason==DLL_THREAD_DETACH) { + if (!mi_is_redirected()) mi_thread_done(); + } + return TRUE; + } + +#elif defined(__cplusplus) + // C++: use static initialization to detect process start + static bool _mi_process_init(void) { + mi_process_load(); + return (_mi_heap_main.thread_id != 0); + } + static bool mi_initialized = _mi_process_init(); + +#elif defined(__GNUC__) || defined(__clang__) + // GCC,Clang: use the constructor attribute + static void __attribute__((constructor)) _mi_process_init(void) { + mi_process_load(); + } + +#elif defined(_MSC_VER) + // MSVC: use data section magic for static libraries + // See + static int _mi_process_init(void) { + mi_process_load(); + return 0; + } + typedef int(*_crt_cb)(void); + #ifdef _M_X64 + __pragma(comment(linker, "/include:" "_mi_msvc_initu")) + #pragma section(".CRT$XIU", long, read) + #else + __pragma(comment(linker, "/include:" "__mi_msvc_initu")) + #endif + #pragma data_seg(".CRT$XIU") + _crt_cb _mi_msvc_initu[] = { &_mi_process_init }; + #pragma data_seg() + +#else +#pragma message("define a way to call mi_process_load on your platform") +#endif diff --git a/runtime/src/mimalloc/c/memory.c b/runtime/src/mimalloc/c/memory.c new file mode 100644 index 00000000000..dd03cf9565f --- /dev/null +++ b/runtime/src/mimalloc/c/memory.c @@ -0,0 +1,546 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2019, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ + +/* ---------------------------------------------------------------------------- +This implements a layer between the raw OS memory (VirtualAlloc/mmap/sbrk/..) +and the segment and huge object allocation by mimalloc. There may be multiple +implementations of this (one could be the identity going directly to the OS, +another could be a simple cache etc), but the current one uses large "regions". +In contrast to the rest of mimalloc, the "regions" are shared between threads and +need to be accessed using atomic operations. +We need this memory layer between the raw OS calls because of: +1. on `sbrk` like systems (like WebAssembly) we need our own memory maps in order + to reuse memory effectively. +2. It turns out that for large objects, between 1MiB and 32MiB (?), the cost of + an OS allocation/free is still (much) too expensive relative to the accesses in that + object :-( (`malloc-large` tests this). This means we need a cheaper way to + reuse memory. +3. This layer can help with a NUMA aware allocation in the future. + +Possible issues: +- (2) can potentially be addressed too with a small cache per thread which is much + simpler. Generally though that requires shrinking of huge pages, and may overuse + memory per thread. (and is not compatible with `sbrk`). +- Since the current regions are per-process, we need atomic operations to + claim blocks which may be contended +- In the worst case, we need to search the whole region map (16KiB for 256GiB) + linearly. At what point will direct OS calls be faster? Is there a way to + do this better without adding too much complexity? +-----------------------------------------------------------------------------*/ +#include "mimalloc.h" +#include "mimalloc-internal.h" +#include "mimalloc-atomic.h" + +#include // memset + +// Internal raw OS interface +size_t _mi_os_large_page_size(); +bool _mi_os_protect(void* addr, size_t size); +bool _mi_os_unprotect(void* addr, size_t size); +bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats); +bool _mi_os_decommit(void* p, size_t size, mi_stats_t* stats); +bool _mi_os_reset(void* p, size_t size, mi_stats_t* stats); +bool _mi_os_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats); +void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_os_tld_t* tld); +void _mi_os_free_ex(void* p, size_t size, bool was_committed, mi_stats_t* stats); +void* _mi_os_try_alloc_from_huge_reserved(size_t size, size_t try_alignment); +bool _mi_os_is_huge_reserved(void* p); + +// Constants +#if (MI_INTPTR_SIZE==8) +#define MI_HEAP_REGION_MAX_SIZE (256 * (1ULL << 30)) // 256GiB => 16KiB for the region map +#elif (MI_INTPTR_SIZE==4) +#define MI_HEAP_REGION_MAX_SIZE (3 * (1UL << 30)) // 3GiB => 196 bytes for the region map +#else +#error "define the maximum heap space allowed for regions on this platform" +#endif + +#define MI_SEGMENT_ALIGN MI_SEGMENT_SIZE + +#define MI_REGION_MAP_BITS (MI_INTPTR_SIZE * 8) +#define MI_REGION_SIZE (MI_SEGMENT_SIZE * MI_REGION_MAP_BITS) +#define MI_REGION_MAX_ALLOC_SIZE ((MI_REGION_MAP_BITS/4)*MI_SEGMENT_SIZE) // 64MiB +#define MI_REGION_MAX (MI_HEAP_REGION_MAX_SIZE / MI_REGION_SIZE) +#define MI_REGION_MAP_FULL UINTPTR_MAX + + +typedef uintptr_t mi_region_info_t; + +static inline mi_region_info_t mi_region_info_create(void* start, bool is_large, bool is_committed) { + return ((uintptr_t)start | ((uintptr_t)(is_large?1:0) << 1) | (is_committed?1:0)); +} + +static inline void* mi_region_info_read(mi_region_info_t info, bool* is_large, bool* is_committed) { + if (is_large) *is_large = ((info&0x02) != 0); + if (is_committed) *is_committed = ((info&0x01) != 0); + return (void*)(info & ~0x03); +} + + +// A region owns a chunk of REGION_SIZE (256MiB) (virtual) memory with +// a bit map with one bit per MI_SEGMENT_SIZE (4MiB) block. +typedef struct mem_region_s { + volatile _Atomic(uintptr_t) map; // in-use bit per MI_SEGMENT_SIZE block + volatile _Atomic(mi_region_info_t) info; // start of virtual memory area, and flags + volatile _Atomic(uintptr_t) dirty_mask; // bit per block if the contents are not zero'd +} mem_region_t; + + +// The region map; 16KiB for a 256GiB HEAP_REGION_MAX +// TODO: in the future, maintain a map per NUMA node for numa aware allocation +static mem_region_t regions[MI_REGION_MAX]; + +static volatile _Atomic(uintptr_t) regions_count; // = 0; // allocated regions + + +/* ---------------------------------------------------------------------------- +Utility functions +-----------------------------------------------------------------------------*/ + +// Blocks (of 4MiB) needed for the given size. +static size_t mi_region_block_count(size_t size) { + mi_assert_internal(size <= MI_REGION_MAX_ALLOC_SIZE); + return (size + MI_SEGMENT_SIZE - 1) / MI_SEGMENT_SIZE; +} + +// The bit mask for a given number of blocks at a specified bit index. +static uintptr_t mi_region_block_mask(size_t blocks, size_t bitidx) { + mi_assert_internal(blocks + bitidx <= MI_REGION_MAP_BITS); + return ((((uintptr_t)1 << blocks) - 1) << bitidx); +} + +// Return a rounded commit/reset size such that we don't fragment large OS pages into small ones. +static size_t mi_good_commit_size(size_t size) { + if (size > (SIZE_MAX - _mi_os_large_page_size())) return size; + return _mi_align_up(size, _mi_os_large_page_size()); +} + +// Return if a pointer points into a region reserved by us. +bool mi_is_in_heap_region(const void* p) mi_attr_noexcept { + if (p==NULL) return false; + size_t count = mi_atomic_read_relaxed(®ions_count); + for (size_t i = 0; i < count; i++) { + uint8_t* start = (uint8_t*)mi_region_info_read( mi_atomic_read_relaxed(®ions[i].info), NULL, NULL); + if (start != NULL && (uint8_t*)p >= start && (uint8_t*)p < start + MI_REGION_SIZE) return true; + } + return false; +} + + +/* ---------------------------------------------------------------------------- +Commit from a region +-----------------------------------------------------------------------------*/ + +// Commit the `blocks` in `region` at `idx` and `bitidx` of a given `size`. +// Returns `false` on an error (OOM); `true` otherwise. `p` and `id` are only written +// if the blocks were successfully claimed so ensure they are initialized to NULL/SIZE_MAX before the call. +// (not being able to claim is not considered an error so check for `p != NULL` afterwards). +static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bitidx, size_t blocks, + size_t size, bool* commit, bool* allow_large, bool* is_zero, void** p, size_t* id, mi_os_tld_t* tld) +{ + size_t mask = mi_region_block_mask(blocks,bitidx); + mi_assert_internal(mask != 0); + mi_assert_internal((mask & mi_atomic_read_relaxed(®ion->map)) == mask); + mi_assert_internal(®ions[idx] == region); + + // ensure the region is reserved + mi_region_info_t info = mi_atomic_read(®ion->info); + if (info == 0) + { + bool region_commit = mi_option_is_enabled(mi_option_eager_region_commit); + bool region_large = *allow_large; + void* start = NULL; + if (region_large) { + start = _mi_os_try_alloc_from_huge_reserved(MI_REGION_SIZE, MI_SEGMENT_ALIGN); + if (start != NULL) { region_commit = true; } + } + if (start == NULL) { + start = _mi_os_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, region_commit, ®ion_large, tld); + } + mi_assert_internal(!(region_large && !*allow_large)); + + if (start == NULL) { + // failure to allocate from the OS! unclaim the blocks and fail + size_t map; + do { + map = mi_atomic_read_relaxed(®ion->map); + } while (!mi_atomic_cas_weak(®ion->map, map & ~mask, map)); + return false; + } + + // set the newly allocated region + info = mi_region_info_create(start,region_large,region_commit); + if (mi_atomic_cas_strong(®ion->info, info, 0)) { + // update the region count + mi_atomic_increment(®ions_count); + } + else { + // failed, another thread allocated just before us! + // we assign it to a later slot instead (up to 4 tries). + for(size_t i = 1; i <= 4 && idx + i < MI_REGION_MAX; i++) { + if (mi_atomic_cas_strong(®ions[idx+i].info, info, 0)) { + mi_atomic_increment(®ions_count); + start = NULL; + break; + } + } + if (start != NULL) { + // free it if we didn't succeed to save it to some other region + _mi_os_free_ex(start, MI_REGION_SIZE, region_commit, tld->stats); + } + // and continue with the memory at our index + info = mi_atomic_read(®ion->info); + } + } + mi_assert_internal(info == mi_atomic_read(®ion->info)); + mi_assert_internal(info != 0); + + // Commit the blocks to memory + bool region_is_committed = false; + bool region_is_large = false; + void* start = mi_region_info_read(info,®ion_is_large,®ion_is_committed); + mi_assert_internal(!(region_is_large && !*allow_large)); + mi_assert_internal(start!=NULL); + + // set dirty bits + uintptr_t m; + do { + m = mi_atomic_read(®ion->dirty_mask); + } while (!mi_atomic_cas_weak(®ion->dirty_mask, m | mask, m)); + *is_zero = ((m & mask) == 0); // no dirty bit set in our claimed range? + + void* blocks_start = (uint8_t*)start + (bitidx * MI_SEGMENT_SIZE); + if (*commit && !region_is_committed) { + // ensure commit + bool commit_zero = false; + _mi_os_commit(blocks_start, mi_good_commit_size(size), &commit_zero, tld->stats); // only commit needed size (unless using large OS pages) + if (commit_zero) *is_zero = true; + } + else if (!*commit && region_is_committed) { + // but even when no commit is requested, we might have committed anyway (in a huge OS page for example) + *commit = true; + } + + // and return the allocation + mi_assert_internal(blocks_start != NULL); + *allow_large = region_is_large; + *p = blocks_start; + *id = (idx*MI_REGION_MAP_BITS) + bitidx; + return true; +} + +// Use bit scan forward to quickly find the first zero bit if it is available +#if defined(_MSC_VER) +#define MI_HAVE_BITSCAN +#include +static inline size_t mi_bsf(uintptr_t x) { + if (x==0) return 8*MI_INTPTR_SIZE; + DWORD idx; + #if (MI_INTPTR_SIZE==8) + _BitScanForward64(&idx, x); + #else + _BitScanForward(&idx, x); + #endif + return idx; +} +static inline size_t mi_bsr(uintptr_t x) { + if (x==0) return 8*MI_INTPTR_SIZE; + DWORD idx; + #if (MI_INTPTR_SIZE==8) + _BitScanReverse64(&idx, x); + #else + _BitScanReverse(&idx, x); + #endif + return idx; +} +#elif defined(__GNUC__) || defined(__clang__) +#define MI_HAVE_BITSCAN +static inline size_t mi_bsf(uintptr_t x) { + return (x==0 ? 8*MI_INTPTR_SIZE : __builtin_ctzl(x)); +} +static inline size_t mi_bsr(uintptr_t x) { + return (x==0 ? 8*MI_INTPTR_SIZE : (8*MI_INTPTR_SIZE - 1) - __builtin_clzl(x)); +} +#endif + +// Allocate `blocks` in a `region` at `idx` of a given `size`. +// Returns `false` on an error (OOM); `true` otherwise. `p` and `id` are only written +// if the blocks were successfully claimed so ensure they are initialized to NULL/SIZE_MAX before the call. +// (not being able to claim is not considered an error so check for `p != NULL` afterwards). +static bool mi_region_alloc_blocks(mem_region_t* region, size_t idx, size_t blocks, size_t size, + bool* commit, bool* allow_large, bool* is_zero, void** p, size_t* id, mi_os_tld_t* tld) +{ + mi_assert_internal(p != NULL && id != NULL); + mi_assert_internal(blocks < MI_REGION_MAP_BITS); + + const uintptr_t mask = mi_region_block_mask(blocks, 0); + const size_t bitidx_max = MI_REGION_MAP_BITS - blocks; + uintptr_t map = mi_atomic_read(®ion->map); + if (map==MI_REGION_MAP_FULL) return true; + + #ifdef MI_HAVE_BITSCAN + size_t bitidx = mi_bsf(~map); // quickly find the first zero bit if possible + #else + size_t bitidx = 0; // otherwise start at 0 + #endif + uintptr_t m = (mask << bitidx); // invariant: m == mask shifted by bitidx + + // scan linearly for a free range of zero bits + while(bitidx <= bitidx_max) { + if ((map & m) == 0) { // are the mask bits free at bitidx? + mi_assert_internal((m >> bitidx) == mask); // no overflow? + uintptr_t newmap = map | m; + mi_assert_internal((newmap^map) >> bitidx == mask); + if (!mi_atomic_cas_weak(®ion->map, newmap, map)) { // TODO: use strong cas here? + // no success, another thread claimed concurrently.. keep going + map = mi_atomic_read(®ion->map); + continue; + } + else { + // success, we claimed the bits + // now commit the block memory -- this can still fail + return mi_region_commit_blocks(region, idx, bitidx, blocks, + size, commit, allow_large, is_zero, p, id, tld); + } + } + else { + // on to the next bit range + #ifdef MI_HAVE_BITSCAN + size_t shift = (blocks == 1 ? 1 : mi_bsr(map & m) - bitidx + 1); + mi_assert_internal(shift > 0 && shift <= blocks); + #else + size_t shift = 1; + #endif + bitidx += shift; + m <<= shift; + } + } + // no error, but also no bits found + return true; +} + +// Try to allocate `blocks` in a `region` at `idx` of a given `size`. Does a quick check before trying to claim. +// Returns `false` on an error (OOM); `true` otherwise. `p` and `id` are only written +// if the blocks were successfully claimed so ensure they are initialized to NULL/0 before the call. +// (not being able to claim is not considered an error so check for `p != NULL` afterwards). +static bool mi_region_try_alloc_blocks(size_t idx, size_t blocks, size_t size, + bool* commit, bool* allow_large, bool* is_zero, + void** p, size_t* id, mi_os_tld_t* tld) +{ + // check if there are available blocks in the region.. + mi_assert_internal(idx < MI_REGION_MAX); + mem_region_t* region = ®ions[idx]; + uintptr_t m = mi_atomic_read_relaxed(®ion->map); + if (m != MI_REGION_MAP_FULL) { // some bits are zero + bool ok = (*commit || *allow_large); // committing or allow-large is always ok + if (!ok) { + // otherwise skip incompatible regions if possible. + // this is not guaranteed due to multiple threads allocating at the same time but + // that's ok. In secure mode, large is never allowed for any thread, so that works out; + // otherwise we might just not be able to reset/decommit individual pages sometimes. + mi_region_info_t info = mi_atomic_read_relaxed(®ion->info); + bool is_large; + bool is_committed; + void* start = mi_region_info_read(info,&is_large,&is_committed); + ok = (start == NULL || (*commit || !is_committed) || (*allow_large || !is_large)); // Todo: test with one bitmap operation? + } + if (ok) { + return mi_region_alloc_blocks(region, idx, blocks, size, commit, allow_large, is_zero, p, id, tld); + } + } + return true; // no error, but no success either +} + +/* ---------------------------------------------------------------------------- + Allocation +-----------------------------------------------------------------------------*/ + +// Allocate `size` memory aligned at `alignment`. Return non NULL on success, with a given memory `id`. +// (`id` is abstract, but `id = idx*MI_REGION_MAP_BITS + bitidx`) +void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, + size_t* id, mi_os_tld_t* tld) +{ + mi_assert_internal(id != NULL && tld != NULL); + mi_assert_internal(size > 0); + *id = SIZE_MAX; + *is_zero = false; + bool default_large = false; + if (large==NULL) large = &default_large; // ensure `large != NULL` + + // use direct OS allocation for huge blocks or alignment (with `id = SIZE_MAX`) + if (size > MI_REGION_MAX_ALLOC_SIZE || alignment > MI_SEGMENT_ALIGN) { + *is_zero = true; + return _mi_os_alloc_aligned(mi_good_commit_size(size), alignment, *commit, large, tld); // round up size + } + + // always round size to OS page size multiple (so commit/decommit go over the entire range) + // TODO: use large OS page size here? + size = _mi_align_up(size, _mi_os_page_size()); + + // calculate the number of needed blocks + size_t blocks = mi_region_block_count(size); + mi_assert_internal(blocks > 0 && blocks <= 8*MI_INTPTR_SIZE); + + // find a range of free blocks + void* p = NULL; + size_t count = mi_atomic_read(®ions_count); + size_t idx = tld->region_idx; // start at 0 to reuse low addresses? Or, use tld->region_idx to reduce contention? + for (size_t visited = 0; visited < count; visited++, idx++) { + if (idx >= count) idx = 0; // wrap around + if (!mi_region_try_alloc_blocks(idx, blocks, size, commit, large, is_zero, &p, id, tld)) return NULL; // error + if (p != NULL) break; + } + + if (p == NULL) { + // no free range in existing regions -- try to extend beyond the count.. but at most 8 regions + for (idx = count; idx < mi_atomic_read_relaxed(®ions_count) + 8 && idx < MI_REGION_MAX; idx++) { + if (!mi_region_try_alloc_blocks(idx, blocks, size, commit, large, is_zero, &p, id, tld)) return NULL; // error + if (p != NULL) break; + } + } + + if (p == NULL) { + // we could not find a place to allocate, fall back to the os directly + _mi_warning_message("unable to allocate from region: size %zu\n", size); + *is_zero = true; + p = _mi_os_alloc_aligned(size, alignment, commit, large, tld); + } + else { + tld->region_idx = idx; // next start of search? currently not used as we use first-fit + } + + mi_assert_internal( p == NULL || (uintptr_t)p % alignment == 0); + return p; +} + + + +/* ---------------------------------------------------------------------------- +Free +-----------------------------------------------------------------------------*/ + +// Free previously allocated memory with a given id. +void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) { + mi_assert_internal(size > 0 && stats != NULL); + if (p==NULL) return; + if (size==0) return; + if (id == SIZE_MAX) { + // was a direct OS allocation, pass through + _mi_os_free(p, size, stats); + } + else { + // allocated in a region + mi_assert_internal(size <= MI_REGION_MAX_ALLOC_SIZE); if (size > MI_REGION_MAX_ALLOC_SIZE) return; + // we can align the size up to page size (as we allocate that way too) + // this ensures we fully commit/decommit/reset + size = _mi_align_up(size, _mi_os_page_size()); + size_t idx = (id / MI_REGION_MAP_BITS); + size_t bitidx = (id % MI_REGION_MAP_BITS); + size_t blocks = mi_region_block_count(size); + size_t mask = mi_region_block_mask(blocks, bitidx); + mi_assert_internal(idx < MI_REGION_MAX); if (idx >= MI_REGION_MAX) return; // or `abort`? + mem_region_t* region = ®ions[idx]; + mi_assert_internal((mi_atomic_read_relaxed(®ion->map) & mask) == mask ); // claimed? + mi_region_info_t info = mi_atomic_read(®ion->info); + bool is_large; + bool is_eager_committed; + void* start = mi_region_info_read(info,&is_large,&is_eager_committed); + mi_assert_internal(start != NULL); + void* blocks_start = (uint8_t*)start + (bitidx * MI_SEGMENT_SIZE); + mi_assert_internal(blocks_start == p); // not a pointer in our area? + mi_assert_internal(bitidx + blocks <= MI_REGION_MAP_BITS); + if (blocks_start != p || bitidx + blocks > MI_REGION_MAP_BITS) return; // or `abort`? + + // decommit (or reset) the blocks to reduce the working set. + // TODO: implement delayed decommit/reset as these calls are too expensive + // if the memory is reused soon. + // reset: 10x slowdown on malloc-large, decommit: 17x slowdown on malloc-large + if (!is_large) { + if (mi_option_is_enabled(mi_option_segment_reset)) { + if (!is_eager_committed && // cannot reset large pages + (mi_option_is_enabled(mi_option_eager_commit) || // cannot reset halfway committed segments, use `option_page_reset` instead + mi_option_is_enabled(mi_option_reset_decommits))) // but we can decommit halfway committed segments + { + _mi_os_reset(p, size, stats); + //_mi_os_decommit(p, size, stats); // todo: and clear dirty bits? + } + } + } + if (!is_eager_committed) { + // adjust commit statistics as we commit again when re-using the same slot + _mi_stat_decrease(&stats->committed, mi_good_commit_size(size)); + } + + // TODO: should we free empty regions? currently only done _mi_mem_collect. + // this frees up virtual address space which might be useful on 32-bit systems? + + // and unclaim + uintptr_t map; + uintptr_t newmap; + do { + map = mi_atomic_read_relaxed(®ion->map); + newmap = map & ~mask; + } while (!mi_atomic_cas_weak(®ion->map, newmap, map)); + } +} + + +/* ---------------------------------------------------------------------------- + collection +-----------------------------------------------------------------------------*/ +void _mi_mem_collect(mi_stats_t* stats) { + // free every region that has no segments in use. + for (size_t i = 0; i < regions_count; i++) { + mem_region_t* region = ®ions[i]; + if (mi_atomic_read_relaxed(®ion->map) == 0) { + // if no segments used, try to claim the whole region + uintptr_t m; + do { + m = mi_atomic_read_relaxed(®ion->map); + } while(m == 0 && !mi_atomic_cas_weak(®ion->map, ~((uintptr_t)0), 0 )); + if (m == 0) { + // on success, free the whole region (unless it was huge reserved) + bool is_eager_committed; + void* start = mi_region_info_read(mi_atomic_read(®ion->info), NULL, &is_eager_committed); + if (start != NULL && !_mi_os_is_huge_reserved(start)) { + _mi_os_free_ex(start, MI_REGION_SIZE, is_eager_committed, stats); + } + // and release + mi_atomic_write(®ion->info,0); + mi_atomic_write(®ion->map,0); + } + } + } +} + +/* ---------------------------------------------------------------------------- + Other +-----------------------------------------------------------------------------*/ + +bool _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats) { + return _mi_os_commit(p, size, is_zero, stats); +} + +bool _mi_mem_decommit(void* p, size_t size, mi_stats_t* stats) { + return _mi_os_decommit(p, size, stats); +} + +bool _mi_mem_reset(void* p, size_t size, mi_stats_t* stats) { + return _mi_os_reset(p, size, stats); +} + +bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats) { + return _mi_os_unreset(p, size, is_zero, stats); +} + +bool _mi_mem_protect(void* p, size_t size) { + return _mi_os_protect(p, size); +} + +bool _mi_mem_unprotect(void* p, size_t size) { + return _mi_os_unprotect(p, size); +} diff --git a/runtime/src/mimalloc/c/options.c b/runtime/src/mimalloc/c/options.c new file mode 100644 index 00000000000..d6b0558b0f6 --- /dev/null +++ b/runtime/src/mimalloc/c/options.c @@ -0,0 +1,410 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2018, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ +#include "mimalloc.h" +#include "mimalloc-internal.h" +#include "mimalloc-atomic.h" + +#include +#include // strtol +#include // strncpy, strncat, strlen, strstr +#include // toupper +#include + +static uintptr_t mi_max_error_count = 16; // stop outputting errors after this + +static void mi_add_stderr_output(); + +int mi_version(void) mi_attr_noexcept { + return MI_MALLOC_VERSION; +} + +#ifdef _WIN32 +#include +#endif + +// -------------------------------------------------------- +// Options +// These can be accessed by multiple threads and may be +// concurrently initialized, but an initializing data race +// is ok since they resolve to the same value. +// -------------------------------------------------------- +typedef enum mi_init_e { + UNINIT, // not yet initialized + DEFAULTED, // not found in the environment, use default value + INITIALIZED // found in environment or set explicitly +} mi_init_t; + +typedef struct mi_option_desc_s { + long value; // the value + mi_init_t init; // is it initialized yet? (from the environment) + mi_option_t option; // for debugging: the option index should match the option + const char* name; // option name without `mimalloc_` prefix +} mi_option_desc_t; + +#define MI_OPTION(opt) mi_option_##opt, #opt +#define MI_OPTION_DESC(opt) {0, UNINIT, MI_OPTION(opt) } + +static mi_option_desc_t options[_mi_option_last] = +{ + // stable options + { MI_DEBUG, UNINIT, MI_OPTION(show_errors) }, + { 0, UNINIT, MI_OPTION(show_stats) }, + { 0, UNINIT, MI_OPTION(verbose) }, + + // the following options are experimental and not all combinations make sense. + { 1, UNINIT, MI_OPTION(eager_commit) }, // note: needs to be on when eager_region_commit is enabled + #ifdef _WIN32 // and BSD? + { 0, UNINIT, MI_OPTION(eager_region_commit) }, // don't commit too eagerly on windows (just for looks...) + #else + { 1, UNINIT, MI_OPTION(eager_region_commit) }, + #endif + { 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's + { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, + { 0, UNINIT, MI_OPTION(segment_cache) }, // cache N segments per thread + { 0, UNINIT, MI_OPTION(page_reset) }, + { 0, UNINIT, MI_OPTION(cache_reset) }, + { 0, UNINIT, MI_OPTION(reset_decommits) }, // note: cannot enable this if secure is on + { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed + { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) + { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose + { 16, UNINIT, MI_OPTION(max_errors) } // maximum errors that are output +}; + +static void mi_option_init(mi_option_desc_t* desc); + +void _mi_options_init(void) { + // called on process load; should not be called before the CRT is initialized! + // (e.g. do not call this from process_init as that may run before CRT initialization) + mi_add_stderr_output(); // now it safe to use stderr for output + for(int i = 0; i < _mi_option_last; i++ ) { + mi_option_t option = (mi_option_t)i; + mi_option_get(option); // initialize + if (option != mi_option_verbose) { + mi_option_desc_t* desc = &options[option]; + _mi_verbose_message("option '%s': %ld\n", desc->name, desc->value); + } + } + mi_max_error_count = mi_option_get(mi_option_max_errors); +} + +long mi_option_get(mi_option_t option) { + mi_assert(option >= 0 && option < _mi_option_last); + mi_option_desc_t* desc = &options[option]; + mi_assert(desc->option == option); // index should match the option + if (mi_unlikely(desc->init == UNINIT)) { + mi_option_init(desc); + } + return desc->value; +} + +void mi_option_set(mi_option_t option, long value) { + mi_assert(option >= 0 && option < _mi_option_last); + mi_option_desc_t* desc = &options[option]; + mi_assert(desc->option == option); // index should match the option + desc->value = value; + desc->init = INITIALIZED; +} + +void mi_option_set_default(mi_option_t option, long value) { + mi_assert(option >= 0 && option < _mi_option_last); + mi_option_desc_t* desc = &options[option]; + if (desc->init != INITIALIZED) { + desc->value = value; + } +} + +bool mi_option_is_enabled(mi_option_t option) { + return (mi_option_get(option) != 0); +} + +void mi_option_set_enabled(mi_option_t option, bool enable) { + mi_option_set(option, (enable ? 1 : 0)); +} + +void mi_option_set_enabled_default(mi_option_t option, bool enable) { + mi_option_set_default(option, (enable ? 1 : 0)); +} + +void mi_option_enable(mi_option_t option) { + mi_option_set_enabled(option,true); +} + +void mi_option_disable(mi_option_t option) { + mi_option_set_enabled(option,false); +} + + +static void mi_out_stderr(const char* msg) { + #ifdef _WIN32 + // on windows with redirection, the C runtime cannot handle locale dependent output + // after the main thread closes so we use direct console output. + if (!_mi_preloading()) { _cputs(msg); } + #else + fputs(msg, stderr); + #endif +} + +// Since an output function can be registered earliest in the `main` +// function we also buffer output that happens earlier. When +// an output function is registered it is called immediately with +// the output up to that point. +#ifndef MI_MAX_DELAY_OUTPUT +#define MI_MAX_DELAY_OUTPUT (32*1024) +#endif +static char out_buf[MI_MAX_DELAY_OUTPUT+1]; +static _Atomic(uintptr_t) out_len; + +static void mi_out_buf(const char* msg) { + if (msg==NULL) return; + if (mi_atomic_read_relaxed(&out_len)>=MI_MAX_DELAY_OUTPUT) return; + size_t n = strlen(msg); + if (n==0) return; + // claim space + uintptr_t start = mi_atomic_addu(&out_len, n); + if (start >= MI_MAX_DELAY_OUTPUT) return; + // check bound + if (start+n >= MI_MAX_DELAY_OUTPUT) { + n = MI_MAX_DELAY_OUTPUT-start-1; + } + memcpy(&out_buf[start], msg, n); +} + +static void mi_out_buf_flush(mi_output_fun* out, bool no_more_buf) { + if (out==NULL) return; + // claim (if `no_more_buf == true`, no more output will be added after this point) + size_t count = mi_atomic_addu(&out_len, (no_more_buf ? MI_MAX_DELAY_OUTPUT : 1)); + // and output the current contents + if (count>MI_MAX_DELAY_OUTPUT) count = MI_MAX_DELAY_OUTPUT; + out_buf[count] = 0; + out(out_buf); + if (!no_more_buf) { + out_buf[count] = '\n'; // if continue with the buffer, insert a newline + } +} + + +// Once this module is loaded, switch to this routine +// which outputs to stderr and the delayed output buffer. +static void mi_out_buf_stderr(const char* msg) { + mi_out_stderr(msg); + mi_out_buf(msg); +} + + + +// -------------------------------------------------------- +// Default output handler +// -------------------------------------------------------- + +// Should be atomic but gives errors on many platforms as generally we cannot cast a function pointer to a uintptr_t. +// For now, don't register output from multiple threads. +#pragma warning(suppress:4180) +static mi_output_fun* volatile mi_out_default; // = NULL + +static mi_output_fun* mi_out_get_default(void) { + mi_output_fun* out = mi_out_default; + return (out == NULL ? &mi_out_buf : out); +} + +void mi_register_output(mi_output_fun* out) mi_attr_noexcept { + mi_out_default = (out == NULL ? &mi_out_stderr : out); // stop using the delayed output buffer + if (out!=NULL) mi_out_buf_flush(out,true); // output all the delayed output now +} + +// add stderr to the delayed output after the module is loaded +static void mi_add_stderr_output() { + mi_out_buf_flush(&mi_out_stderr, false); // flush current contents to stderr + mi_out_default = &mi_out_buf_stderr; // and add stderr to the delayed output +} + +// -------------------------------------------------------- +// Messages, all end up calling `_mi_fputs`. +// -------------------------------------------------------- +#define MAX_ERROR_COUNT (10) +static volatile _Atomic(uintptr_t) error_count; // = 0; // when MAX_ERROR_COUNT stop emitting errors and warnings + +// When overriding malloc, we may recurse into mi_vfprintf if an allocation +// inside the C runtime causes another message. +static mi_decl_thread bool recurse = false; + +void _mi_fputs(mi_output_fun* out, const char* prefix, const char* message) { + if (recurse) return; + if (out==NULL || (FILE*)out==stdout || (FILE*)out==stderr) out = mi_out_get_default(); + recurse = true; + if (prefix != NULL) out(prefix); + out(message); + recurse = false; + return; +} + +// Define our own limited `fprintf` that avoids memory allocation. +// We do this using `snprintf` with a limited buffer. +static void mi_vfprintf( mi_output_fun* out, const char* prefix, const char* fmt, va_list args ) { + char buf[512]; + if (fmt==NULL) return; + if (recurse) return; + recurse = true; + vsnprintf(buf,sizeof(buf)-1,fmt,args); + recurse = false; + _mi_fputs(out,prefix,buf); +} + + +void _mi_fprintf( mi_output_fun* out, const char* fmt, ... ) { + va_list args; + va_start(args,fmt); + mi_vfprintf(out,NULL,fmt,args); + va_end(args); +} + +void _mi_trace_message(const char* fmt, ...) { + if (mi_option_get(mi_option_verbose) <= 1) return; // only with verbose level 2 or higher + va_list args; + va_start(args, fmt); + mi_vfprintf(NULL, "mimalloc: ", fmt, args); + va_end(args); +} + +void _mi_verbose_message(const char* fmt, ...) { + if (!mi_option_is_enabled(mi_option_verbose)) return; + va_list args; + va_start(args,fmt); + mi_vfprintf(NULL, "mimalloc: ", fmt, args); + va_end(args); +} + +void _mi_error_message(const char* fmt, ...) { + if (!mi_option_is_enabled(mi_option_show_errors) && !mi_option_is_enabled(mi_option_verbose)) return; + if (mi_atomic_increment(&error_count) > mi_max_error_count) return; + va_list args; + va_start(args,fmt); + mi_vfprintf(NULL, "mimalloc: error: ", fmt, args); + va_end(args); + mi_assert(false); +} + +void _mi_warning_message(const char* fmt, ...) { + if (!mi_option_is_enabled(mi_option_show_errors) && !mi_option_is_enabled(mi_option_verbose)) return; + if (mi_atomic_increment(&error_count) > mi_max_error_count) return; + va_list args; + va_start(args,fmt); + mi_vfprintf(NULL, "mimalloc: warning: ", fmt, args); + va_end(args); +} + + +#if MI_DEBUG +void _mi_assert_fail(const char* assertion, const char* fname, unsigned line, const char* func ) { + _mi_fprintf(NULL,"mimalloc: assertion failed: at \"%s\":%u, %s\n assertion: \"%s\"\n", fname, line, (func==NULL?"":func), assertion); + abort(); +} +#endif + +mi_attr_noreturn void _mi_fatal_error(const char* fmt, ...) { + va_list args; + va_start(args, fmt); + mi_vfprintf(NULL, "mimalloc: fatal: ", fmt, args); + va_end(args); + #if (MI_SECURE>=0) + abort(); + #endif +} + +// -------------------------------------------------------- +// Initialize options by checking the environment +// -------------------------------------------------------- + +static void mi_strlcpy(char* dest, const char* src, size_t dest_size) { + dest[0] = 0; + #pragma warning(suppress:4996) + strncpy(dest, src, dest_size - 1); + dest[dest_size - 1] = 0; +} + +static void mi_strlcat(char* dest, const char* src, size_t dest_size) { + #pragma warning(suppress:4996) + strncat(dest, src, dest_size - 1); + dest[dest_size - 1] = 0; +} + +#if defined _WIN32 +// On Windows use GetEnvironmentVariable instead of getenv to work +// reliably even when this is invoked before the C runtime is initialized. +// i.e. when `_mi_preloading() == true`. +// Note: on windows, environment names are not case sensitive. +#include +static bool mi_getenv(const char* name, char* result, size_t result_size) { + result[0] = 0; + size_t len = GetEnvironmentVariableA(name, result, (DWORD)result_size); + return (len > 0 && len < result_size); +} +#else +static bool mi_getenv(const char* name, char* result, size_t result_size) { + const char* s = getenv(name); + if (s == NULL) { + // in unix environments we check the upper case name too. + char buf[64+1]; + size_t len = strlen(name); + if (len >= sizeof(buf)) len = sizeof(buf) - 1; + for (size_t i = 0; i < len; i++) { + buf[i] = toupper(name[i]); + } + buf[len] = 0; + s = getenv(buf); + } + if (s != NULL && strlen(s) < result_size) { + mi_strlcpy(result, s, result_size); + return true; + } + else { + return false; + } +} +#endif +static void mi_option_init(mi_option_desc_t* desc) { + #ifndef _WIN32 + // cannot call getenv() when still initializing the C runtime. + if (_mi_preloading()) return; + #endif + // Read option value from the environment + char buf[64+1]; + mi_strlcpy(buf, "mimalloc_", sizeof(buf)); + mi_strlcat(buf, desc->name, sizeof(buf)); + char s[64+1]; + if (mi_getenv(buf, s, sizeof(s))) { + size_t len = strlen(s); + if (len >= sizeof(buf)) len = sizeof(buf) - 1; + for (size_t i = 0; i < len; i++) { + buf[i] = (char)toupper(s[i]); + } + buf[len] = 0; + if (buf[0]==0 || strstr("1;TRUE;YES;ON", buf) != NULL) { + desc->value = 1; + desc->init = INITIALIZED; + } + else if (strstr("0;FALSE;NO;OFF", buf) != NULL) { + desc->value = 0; + desc->init = INITIALIZED; + } + else { + char* end = buf; + long value = strtol(buf, &end, 10); + if (*end == 0) { + desc->value = value; + desc->init = INITIALIZED; + } + else { + _mi_warning_message("environment option mimalloc_%s has an invalid value: %s\n", desc->name, buf); + desc->init = DEFAULTED; + } + } + } + else { + desc->init = DEFAULTED; + } + mi_assert_internal(desc->init != UNINIT); +} diff --git a/runtime/src/mimalloc/c/os.c b/runtime/src/mimalloc/c/os.c new file mode 100644 index 00000000000..8f5afc5b469 --- /dev/null +++ b/runtime/src/mimalloc/c/os.c @@ -0,0 +1,950 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2018, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ +#ifndef _DEFAULT_SOURCE +#define _DEFAULT_SOURCE // ensure mmap flags are defined +#endif + +#include "mimalloc.h" +#include "mimalloc-internal.h" +#include "mimalloc-atomic.h" + +#include // strerror +#include + +#if defined(_WIN32) +#include +#elif defined(__wasi__) +// stdlib.h is all we need, and has already been included in mimalloc.h +#else +#include // mmap +#include // sysconf +#if defined(__linux__) +#include // linux mmap flags +#endif +#if defined(__APPLE__) +#include +#endif +#endif + +/* ----------------------------------------------------------- + Initialization. + On windows initializes support for aligned allocation and + large OS pages (if MIMALLOC_LARGE_OS_PAGES is true). +----------------------------------------------------------- */ +bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats); +bool _mi_os_is_huge_reserved(void* p); +void* _mi_os_try_alloc_from_huge_reserved(size_t size, size_t try_alignment); + +static void* mi_align_up_ptr(void* p, size_t alignment) { + return (void*)_mi_align_up((uintptr_t)p, alignment); +} + +static uintptr_t _mi_align_down(uintptr_t sz, size_t alignment) { + return (sz / alignment) * alignment; +} + +static void* mi_align_down_ptr(void* p, size_t alignment) { + return (void*)_mi_align_down((uintptr_t)p, alignment); +} + +// page size (initialized properly in `os_init`) +static size_t os_page_size = 4096; + +// minimal allocation granularity +static size_t os_alloc_granularity = 4096; + +// if non-zero, use large page allocation +static size_t large_os_page_size = 0; + +// OS (small) page size +size_t _mi_os_page_size() { + return os_page_size; +} + +// if large OS pages are supported (2 or 4MiB), then return the size, otherwise return the small page size (4KiB) +size_t _mi_os_large_page_size() { + return (large_os_page_size != 0 ? large_os_page_size : _mi_os_page_size()); +} + +static bool use_large_os_page(size_t size, size_t alignment) { + // if we have access, check the size and alignment requirements + if (large_os_page_size == 0 || !mi_option_is_enabled(mi_option_large_os_pages)) return false; + return ((size % large_os_page_size) == 0 && (alignment % large_os_page_size) == 0); +} + +// round to a good OS allocation size (bounded by max 12.5% waste) +size_t _mi_os_good_alloc_size(size_t size) { + size_t align_size; + if (size < 512*KiB) align_size = _mi_os_page_size(); + else if (size < 2*MiB) align_size = 64*KiB; + else if (size < 8*MiB) align_size = 256*KiB; + else if (size < 32*MiB) align_size = 1*MiB; + else align_size = 4*MiB; + if (size >= (SIZE_MAX - align_size)) return size; // possible overflow? + return _mi_align_up(size, align_size); +} + +#if defined(_WIN32) +// We use VirtualAlloc2 for aligned allocation, but it is only supported on Windows 10 and Windows Server 2016. +// So, we need to look it up dynamically to run on older systems. (use __stdcall for 32-bit compatibility) +// NtAllocateVirtualAllocEx is used for huge OS page allocation (1GiB) +// We hide MEM_EXTENDED_PARAMETER to compile with older SDK's. +#include +typedef PVOID (__stdcall *PVirtualAlloc2)(HANDLE, PVOID, SIZE_T, ULONG, ULONG, /* MEM_EXTENDED_PARAMETER* */ void*, ULONG); +typedef NTSTATUS (__stdcall *PNtAllocateVirtualMemoryEx)(HANDLE, PVOID*, SIZE_T*, ULONG, ULONG, /* MEM_EXTENDED_PARAMETER* */ PVOID, ULONG); +static PVirtualAlloc2 pVirtualAlloc2 = NULL; +static PNtAllocateVirtualMemoryEx pNtAllocateVirtualMemoryEx = NULL; + +static bool mi_win_enable_large_os_pages() +{ + if (large_os_page_size > 0) return true; + + // Try to see if large OS pages are supported + // To use large pages on Windows, we first need access permission + // Set "Lock pages in memory" permission in the group policy editor + // + unsigned long err = 0; + HANDLE token = NULL; + BOOL ok = OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &token); + if (ok) { + TOKEN_PRIVILEGES tp; + ok = LookupPrivilegeValue(NULL, TEXT("SeLockMemoryPrivilege"), &tp.Privileges[0].Luid); + if (ok) { + tp.PrivilegeCount = 1; + tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED; + ok = AdjustTokenPrivileges(token, FALSE, &tp, 0, (PTOKEN_PRIVILEGES)NULL, 0); + if (ok) { + err = GetLastError(); + ok = (err == ERROR_SUCCESS); + if (ok) { + large_os_page_size = GetLargePageMinimum(); + } + } + } + CloseHandle(token); + } + if (!ok) { + if (err == 0) err = GetLastError(); + _mi_warning_message("cannot enable large OS page support, error %lu\n", err); + } + return (ok!=0); +} + +void _mi_os_init(void) { + // get the page size + SYSTEM_INFO si; + GetSystemInfo(&si); + if (si.dwPageSize > 0) os_page_size = si.dwPageSize; + if (si.dwAllocationGranularity > 0) os_alloc_granularity = si.dwAllocationGranularity; + // get the VirtualAlloc2 function + HINSTANCE hDll; + hDll = LoadLibrary(TEXT("kernelbase.dll")); + if (hDll != NULL) { + // use VirtualAlloc2FromApp if possible as it is available to Windows store apps + pVirtualAlloc2 = (PVirtualAlloc2)(void (*)(void))GetProcAddress(hDll, "VirtualAlloc2FromApp"); + if (pVirtualAlloc2==NULL) pVirtualAlloc2 = (PVirtualAlloc2)(void (*)(void))GetProcAddress(hDll, "VirtualAlloc2"); + FreeLibrary(hDll); + } + hDll = LoadLibrary(TEXT("ntdll.dll")); + if (hDll != NULL) { + pNtAllocateVirtualMemoryEx = (PNtAllocateVirtualMemoryEx)(void (*)(void))GetProcAddress(hDll, "NtAllocateVirtualMemoryEx"); + FreeLibrary(hDll); + } + if (mi_option_is_enabled(mi_option_large_os_pages) || mi_option_is_enabled(mi_option_reserve_huge_os_pages)) { + mi_win_enable_large_os_pages(); + } +} +#elif defined(__wasi__) +void _mi_os_init() { + os_page_size = 0x10000; // WebAssembly has a fixed page size: 64KB + os_alloc_granularity = 16; +} +#else +void _mi_os_init() { + // get the page size + long result = sysconf(_SC_PAGESIZE); + if (result > 0) { + os_page_size = (size_t)result; + os_alloc_granularity = os_page_size; + } + if (mi_option_is_enabled(mi_option_large_os_pages)) { + large_os_page_size = (1UL << 21); // 2MiB + } +} +#endif + + +/* ----------------------------------------------------------- + Raw allocation on Windows (VirtualAlloc) and Unix's (mmap). +----------------------------------------------------------- */ + +static bool mi_os_mem_free(void* addr, size_t size, bool was_committed, mi_stats_t* stats) +{ + if (addr == NULL || size == 0 || _mi_os_is_huge_reserved(addr)) return true; + bool err = false; +#if defined(_WIN32) + err = (VirtualFree(addr, 0, MEM_RELEASE) == 0); +#elif defined(__wasi__) + err = 0; // WebAssembly's heap cannot be shrunk +#else + err = (munmap(addr, size) == -1); +#endif + if (was_committed) _mi_stat_decrease(&stats->committed, size); + _mi_stat_decrease(&stats->reserved, size); + if (err) { +#pragma warning(suppress:4996) + _mi_warning_message("munmap failed: %s, addr 0x%8li, size %lu\n", strerror(errno), (size_t)addr, size); + return false; + } + else { + return true; + } +} + +static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size); + +#ifdef _WIN32 +static void* mi_win_virtual_allocx(void* addr, size_t size, size_t try_alignment, DWORD flags) { +#if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS) + // on modern Windows try use NtAllocateVirtualMemoryEx for 1GiB huge pages + if ((size % ((uintptr_t)1 << 30)) == 0 /* 1GiB multiple */ + && (flags & MEM_LARGE_PAGES) != 0 && (flags & MEM_COMMIT) != 0 && (flags & MEM_RESERVE) != 0 + && (addr != NULL || try_alignment == 0 || try_alignment % _mi_os_page_size() == 0) + && pNtAllocateVirtualMemoryEx != NULL) + { + #ifndef MEM_EXTENDED_PARAMETER_NONPAGED_HUGE + #define MEM_EXTENDED_PARAMETER_NONPAGED_HUGE (0x10) + #endif + MEM_EXTENDED_PARAMETER param = { 0, 0 }; + param.Type = 5; // == MemExtendedParameterAttributeFlags; + param.ULong64 = MEM_EXTENDED_PARAMETER_NONPAGED_HUGE; + SIZE_T psize = size; + void* base = addr; + NTSTATUS err = (*pNtAllocateVirtualMemoryEx)(GetCurrentProcess(), &base, &psize, flags, PAGE_READWRITE, ¶m, 1); + if (err == 0) { + return base; + } + else { + // else fall back to regular large OS pages + _mi_warning_message("unable to allocate huge (1GiB) page, trying large (2MiB) pages instead (error 0x%lx)\n", err); + } + } +#endif +#if (MI_INTPTR_SIZE >= 8) + // on 64-bit systems, try to use the virtual address area after 4TiB for 4MiB aligned allocations + void* hint; + if (addr == NULL && (hint = mi_os_get_aligned_hint(try_alignment,size)) != NULL) { + return VirtualAlloc(hint, size, flags, PAGE_READWRITE); + } +#endif +#if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS) + // on modern Windows try use VirtualAlloc2 for aligned allocation + if (try_alignment > 0 && (try_alignment % _mi_os_page_size()) == 0 && pVirtualAlloc2 != NULL) { + MEM_ADDRESS_REQUIREMENTS reqs = { 0 }; + reqs.Alignment = try_alignment; + MEM_EXTENDED_PARAMETER param = { 0 }; + param.Type = MemExtendedParameterAddressRequirements; + param.Pointer = &reqs; + return (*pVirtualAlloc2)(GetCurrentProcess(), addr, size, flags, PAGE_READWRITE, ¶m, 1); + } +#endif + return VirtualAlloc(addr, size, flags, PAGE_READWRITE); +} + +static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment, DWORD flags, bool large_only, bool allow_large, bool* is_large) { + mi_assert_internal(!(large_only && !allow_large)); + static volatile _Atomic(uintptr_t) large_page_try_ok; // = 0; + void* p = NULL; + if ((large_only || use_large_os_page(size, try_alignment)) + && allow_large && (flags&MEM_COMMIT)!=0 && (flags&MEM_RESERVE)!=0) { + uintptr_t try_ok = mi_atomic_read(&large_page_try_ok); + if (!large_only && try_ok > 0) { + // if a large page allocation fails, it seems the calls to VirtualAlloc get very expensive. + // therefore, once a large page allocation failed, we don't try again for `large_page_try_ok` times. + mi_atomic_cas_weak(&large_page_try_ok, try_ok - 1, try_ok); + } + else { + // large OS pages must always reserve and commit. + *is_large = true; + p = mi_win_virtual_allocx(addr, size, try_alignment, flags | MEM_LARGE_PAGES); + if (large_only) return p; + // fall back to non-large page allocation on error (`p == NULL`). + if (p == NULL) { + mi_atomic_write(&large_page_try_ok,10); // on error, don't try again for the next N allocations + } + } + } + if (p == NULL) { + *is_large = ((flags&MEM_LARGE_PAGES) != 0); + p = mi_win_virtual_allocx(addr, size, try_alignment, flags); + } + if (p == NULL) { + _mi_warning_message("unable to allocate memory: error code: %i, addr: %p, size: 0x%x, large only: %d, allow_large: %d\n", GetLastError(), addr, size, large_only, allow_large); + } + return p; +} + +#elif defined(__wasi__) +static void* mi_wasm_heap_grow(size_t size, size_t try_alignment) { + uintptr_t base = __builtin_wasm_memory_size(0) * _mi_os_page_size(); + uintptr_t aligned_base = _mi_align_up(base, (uintptr_t) try_alignment); + size_t alloc_size = _mi_align_up( aligned_base - base + size, _mi_os_page_size()); + mi_assert(alloc_size >= size && (alloc_size % _mi_os_page_size()) == 0); + if (alloc_size < size) return NULL; + if (__builtin_wasm_memory_grow(0, alloc_size / _mi_os_page_size()) == SIZE_MAX) { + errno = ENOMEM; + return NULL; + } + return (void*)aligned_base; +} +#else +#define MI_OS_USE_MMAP +static void* mi_unix_mmapx(void* addr, size_t size, size_t try_alignment, int protect_flags, int flags, int fd) { + void* p = NULL; + #if (MI_INTPTR_SIZE >= 8) && !defined(MAP_ALIGNED) + // on 64-bit systems, use the virtual address area after 4TiB for 4MiB aligned allocations + void* hint; + if (addr == NULL && (hint = mi_os_get_aligned_hint(try_alignment, size)) != NULL) { + p = mmap(hint,size,protect_flags,flags,fd,0); + if (p==MAP_FAILED) p = NULL; // fall back to regular mmap + } + #else + UNUSED(try_alignment); + #endif + if (p==NULL) { + p = mmap(addr,size,protect_flags,flags,fd,0); + if (p==MAP_FAILED) p = NULL; + } + return p; +} + +static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int protect_flags, bool large_only, bool allow_large, bool* is_large) { + void* p = NULL; + #if !defined(MAP_ANONYMOUS) + #define MAP_ANONYMOUS MAP_ANON + #endif + int flags = MAP_PRIVATE | MAP_ANONYMOUS; + int fd = -1; + #if defined(MAP_ALIGNED) // BSD + if (try_alignment > 0) { + size_t n = _mi_bsr(try_alignment); + if (((size_t)1 << n) == try_alignment && n >= 12 && n <= 30) { // alignment is a power of 2 and 4096 <= alignment <= 1GiB + flags |= MAP_ALIGNED(n); + } + } + #endif + #if defined(PROT_MAX) + protect_flags |= PROT_MAX(PROT_READ | PROT_WRITE); // BSD + #endif + #if defined(VM_MAKE_TAG) + // macOS: tracking anonymous page with a specific ID. (All up to 98 are taken officially but LLVM sanitizers had taken 99) + int os_tag = (int)mi_option_get(mi_option_os_tag); + if (os_tag < 100 || os_tag > 255) os_tag = 100; + fd = VM_MAKE_TAG(os_tag); + #endif + if ((large_only || use_large_os_page(size, try_alignment)) && allow_large) { + static volatile _Atomic(uintptr_t) large_page_try_ok; // = 0; + uintptr_t try_ok = mi_atomic_read(&large_page_try_ok); + if (!large_only && try_ok > 0) { + // If the OS is not configured for large OS pages, or the user does not have + // enough permission, the `mmap` will always fail (but it might also fail for other reasons). + // Therefore, once a large page allocation failed, we don't try again for `large_page_try_ok` times + // to avoid too many failing calls to mmap. + mi_atomic_cas_weak(&large_page_try_ok, try_ok - 1, try_ok); + } + else { + int lflags = flags; + int lfd = fd; + #ifdef MAP_ALIGNED_SUPER + lflags |= MAP_ALIGNED_SUPER; + #endif + #ifdef MAP_HUGETLB + lflags |= MAP_HUGETLB; + #endif + #ifdef MAP_HUGE_1GB + if ((size % ((uintptr_t)1 << 30)) == 0) { + lflags |= MAP_HUGE_1GB; + } + else + #endif + { + #ifdef MAP_HUGE_2MB + lflags |= MAP_HUGE_2MB; + #endif + } + #ifdef VM_FLAGS_SUPERPAGE_SIZE_2MB + lfd |= VM_FLAGS_SUPERPAGE_SIZE_2MB; + #endif + if (large_only || lflags != flags) { + // try large OS page allocation + *is_large = true; + p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, lflags, lfd); + #ifdef MAP_HUGE_1GB + if (p == NULL && (lflags & MAP_HUGE_1GB) != 0) { + _mi_warning_message("unable to allocate huge (1GiB) page, trying large (2MiB) pages instead (error %i)\n", errno); + lflags = ((lflags & ~MAP_HUGE_1GB) | MAP_HUGE_2MB); + p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, lflags, lfd); + } + #endif + if (large_only) return p; + if (p == NULL) { + mi_atomic_write(&large_page_try_ok, 10); // on error, don't try again for the next N allocations + } + } + } + } + if (p == NULL) { + *is_large = false; + p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, flags, fd); + #if defined(MADV_HUGEPAGE) + // Many Linux systems don't allow MAP_HUGETLB but they support instead + // transparent huge pages (TPH). It is not required to call `madvise` with MADV_HUGE + // though since properly aligned allocations will already use large pages if available + // in that case -- in particular for our large regions (in `memory.c`). + // However, some systems only allow TPH if called with explicit `madvise`, so + // when large OS pages are enabled for mimalloc, we call `madvice` anyways. + if (allow_large && use_large_os_page(size, try_alignment)) { + if (madvise(p, size, MADV_HUGEPAGE) == 0) { + *is_large = true; // possibly + }; + } + #endif + } + return p; +} +#endif + +// On 64-bit systems, we can do efficient aligned allocation by using +// the 4TiB to 30TiB area to allocate them. +#if (MI_INTPTR_SIZE >= 8) && (defined(_WIN32) || (defined(MI_OS_USE_MMAP) && !defined(MAP_ALIGNED))) +static volatile _Atomic(intptr_t) aligned_base; + +// Return a 4MiB aligned address that is probably available +static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size) { + if (try_alignment == 0 || try_alignment > MI_SEGMENT_SIZE) return NULL; + if ((size%MI_SEGMENT_SIZE) != 0) return NULL; + intptr_t hint = mi_atomic_add(&aligned_base, size); + if (hint == 0 || hint > ((intptr_t)30<<40)) { // try to wrap around after 30TiB (area after 32TiB is used for huge OS pages) + intptr_t init = ((intptr_t)4 << 40); // start at 4TiB area + #if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of aligned allocations unless in debug mode + uintptr_t r = _mi_random_init((uintptr_t)&mi_os_get_aligned_hint ^ hint); + init = init + (MI_SEGMENT_SIZE * ((r>>17) & 0xFFFF)); // (randomly 0-64k)*4MiB == 0 to 256GiB + #endif + mi_atomic_cas_strong(mi_atomic_cast(uintptr_t, &aligned_base), init, hint + size); + hint = mi_atomic_add(&aligned_base, size); // this may still give 0 or > 30TiB but that is ok, it is a hint after all + } + if (hint%try_alignment != 0) return NULL; + return (void*)hint; +} +#else +static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size) { + UNUSED(try_alignment); UNUSED(size); + return NULL; +} +#endif + + +// Primitive allocation from the OS. +// Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned. +static void* mi_os_mem_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, mi_stats_t* stats) { + mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0); + if (size == 0) return NULL; + if (!commit) allow_large = false; + + void* p = NULL; + /* + if (commit && allow_large) { + p = _mi_os_try_alloc_from_huge_reserved(size, try_alignment); + if (p != NULL) { + *is_large = true; + return p; + } + } + */ + + #if defined(_WIN32) + int flags = MEM_RESERVE; + if (commit) flags |= MEM_COMMIT; + p = mi_win_virtual_alloc(NULL, size, try_alignment, flags, false, allow_large, is_large); + #elif defined(__wasi__) + *is_large = false; + p = mi_wasm_heap_grow(size, try_alignment); + #else + int protect_flags = (commit ? (PROT_WRITE | PROT_READ) : PROT_NONE); + p = mi_unix_mmap(NULL, size, try_alignment, protect_flags, false, allow_large, is_large); + #endif + mi_stat_counter_increase(stats->mmap_calls, 1); + if (p != NULL) { + _mi_stat_increase(&stats->reserved, size); + if (commit) { _mi_stat_increase(&stats->committed, size); } + } + return p; +} + + +// Primitive aligned allocation from the OS. +// This function guarantees the allocated memory is aligned. +static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, bool* is_large, mi_stats_t* stats) { + mi_assert_internal(alignment >= _mi_os_page_size() && ((alignment & (alignment - 1)) == 0)); + mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0); + if (!commit) allow_large = false; + if (!(alignment >= _mi_os_page_size() && ((alignment & (alignment - 1)) == 0))) return NULL; + size = _mi_align_up(size, _mi_os_page_size()); + + // try first with a hint (this will be aligned directly on Win 10+ or BSD) + void* p = mi_os_mem_alloc(size, alignment, commit, allow_large, is_large, stats); + if (p == NULL) return NULL; + + // if not aligned, free it, overallocate, and unmap around it + if (((uintptr_t)p % alignment != 0)) { + mi_os_mem_free(p, size, commit, stats); + if (size >= (SIZE_MAX - alignment)) return NULL; // overflow + size_t over_size = size + alignment; + +#if _WIN32 + // over-allocate and than re-allocate exactly at an aligned address in there. + // this may fail due to threads allocating at the same time so we + // retry this at most 3 times before giving up. + // (we can not decommit around the overallocation on Windows, because we can only + // free the original pointer, not one pointing inside the area) + int flags = MEM_RESERVE; + if (commit) flags |= MEM_COMMIT; + for (int tries = 0; tries < 3; tries++) { + // over-allocate to determine a virtual memory range + p = mi_os_mem_alloc(over_size, alignment, commit, false, is_large, stats); + if (p == NULL) return NULL; // error + if (((uintptr_t)p % alignment) == 0) { + // if p happens to be aligned, just decommit the left-over area + _mi_os_decommit((uint8_t*)p + size, over_size - size, stats); + break; + } + else { + // otherwise free and allocate at an aligned address in there + mi_os_mem_free(p, over_size, commit, stats); + void* aligned_p = mi_align_up_ptr(p, alignment); + p = mi_win_virtual_alloc(aligned_p, size, alignment, flags, false, allow_large, is_large); + if (p == aligned_p) break; // success! + if (p != NULL) { // should not happen? + mi_os_mem_free(p, size, commit, stats); + p = NULL; + } + } + } +#else + // overallocate... + p = mi_os_mem_alloc(over_size, alignment, commit, false, is_large, stats); + if (p == NULL) return NULL; + // and selectively unmap parts around the over-allocated area. + void* aligned_p = mi_align_up_ptr(p, alignment); + size_t pre_size = (uint8_t*)aligned_p - (uint8_t*)p; + size_t mid_size = _mi_align_up(size, _mi_os_page_size()); + size_t post_size = over_size - pre_size - mid_size; + mi_assert_internal(pre_size < over_size && post_size < over_size && mid_size >= size); + if (pre_size > 0) mi_os_mem_free(p, pre_size, commit, stats); + if (post_size > 0) mi_os_mem_free((uint8_t*)aligned_p + mid_size, post_size, commit, stats); + // we can return the aligned pointer on `mmap` systems + p = aligned_p; +#endif + } + + mi_assert_internal(p == NULL || (p != NULL && ((uintptr_t)p % alignment) == 0)); + return p; +} + +/* ----------------------------------------------------------- + OS API: alloc, free, alloc_aligned +----------------------------------------------------------- */ + +void* _mi_os_alloc(size_t size, mi_stats_t* stats) { + if (size == 0) return NULL; + size = _mi_os_good_alloc_size(size); + bool is_large = false; + return mi_os_mem_alloc(size, 0, true, false, &is_large, stats); +} + +void _mi_os_free_ex(void* p, size_t size, bool was_committed, mi_stats_t* stats) { + if (size == 0 || p == NULL) return; + size = _mi_os_good_alloc_size(size); + mi_os_mem_free(p, size, was_committed, stats); +} + +void _mi_os_free(void* p, size_t size, mi_stats_t* stats) { + _mi_os_free_ex(p, size, true, stats); +} + +void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_os_tld_t* tld) +{ + if (size == 0) return NULL; + size = _mi_os_good_alloc_size(size); + alignment = _mi_align_up(alignment, _mi_os_page_size()); + bool allow_large = false; + if (large != NULL) { + allow_large = *large; + *large = false; + } + return mi_os_mem_alloc_aligned(size, alignment, commit, allow_large, (large!=NULL?large:&allow_large), tld->stats); +} + + + +/* ----------------------------------------------------------- + OS memory API: reset, commit, decommit, protect, unprotect. +----------------------------------------------------------- */ + + +// OS page align within a given area, either conservative (pages inside the area only), +// or not (straddling pages outside the area is possible) +static void* mi_os_page_align_areax(bool conservative, void* addr, size_t size, size_t* newsize) { + mi_assert(addr != NULL && size > 0); + if (newsize != NULL) *newsize = 0; + if (size == 0 || addr == NULL) return NULL; + + // page align conservatively within the range + void* start = (conservative ? mi_align_up_ptr(addr, _mi_os_page_size()) + : mi_align_down_ptr(addr, _mi_os_page_size())); + void* end = (conservative ? mi_align_down_ptr((uint8_t*)addr + size, _mi_os_page_size()) + : mi_align_up_ptr((uint8_t*)addr + size, _mi_os_page_size())); + ptrdiff_t diff = (uint8_t*)end - (uint8_t*)start; + if (diff <= 0) return NULL; + + mi_assert_internal((conservative && (size_t)diff <= size) || (!conservative && (size_t)diff >= size)); + if (newsize != NULL) *newsize = (size_t)diff; + return start; +} + +static void* mi_os_page_align_area_conservative(void* addr, size_t size, size_t* newsize) { + return mi_os_page_align_areax(true, addr, size, newsize); +} + +// Commit/Decommit memory. +// Usuelly commit is aligned liberal, while decommit is aligned conservative. +// (but not for the reset version where we want commit to be conservative as well) +static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservative, bool* is_zero, mi_stats_t* stats) { + // page align in the range, commit liberally, decommit conservative + *is_zero = false; + size_t csize; + void* start = mi_os_page_align_areax(conservative, addr, size, &csize); + if (csize == 0 || _mi_os_is_huge_reserved(addr)) return true; + int err = 0; + if (commit) { + _mi_stat_increase(&stats->committed, csize); + _mi_stat_counter_increase(&stats->commit_calls, 1); + } + else { + _mi_stat_decrease(&stats->committed, csize); + } + + #if defined(_WIN32) + if (commit) { + // if the memory was already committed, the call succeeds but it is not zero'd + // *is_zero = true; + void* p = VirtualAlloc(start, csize, MEM_COMMIT, PAGE_READWRITE); + err = (p == start ? 0 : GetLastError()); + } + else { + BOOL ok = VirtualFree(start, csize, MEM_DECOMMIT); + err = (ok ? 0 : GetLastError()); + } + #elif defined(__wasi__) + // WebAssembly guests can't control memory protection + #else + err = mprotect(start, csize, (commit ? (PROT_READ | PROT_WRITE) : PROT_NONE)); + if (err != 0) { err = errno; } + #endif + if (err != 0) { + _mi_warning_message("commit/decommit error: start: 0x%p, csize: 0x%x, err: %i\n", start, csize, err); + } + mi_assert_internal(err == 0); + return (err == 0); +} + +bool _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* stats) { + return mi_os_commitx(addr, size, true, false /* conservative? */, is_zero, stats); +} + +bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats) { + bool is_zero; + return mi_os_commitx(addr, size, false, true /* conservative? */, &is_zero, stats); +} + +bool _mi_os_commit_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* stats) { + return mi_os_commitx(addr, size, true, true /* conservative? */, is_zero, stats); +} + + +// Signal to the OS that the address range is no longer in use +// but may be used later again. This will release physical memory +// pages and reduce swapping while keeping the memory committed. +// We page align to a conservative area inside the range to reset. +static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats) { + // page align conservatively within the range + size_t csize; + void* start = mi_os_page_align_area_conservative(addr, size, &csize); + if (csize == 0 || _mi_os_is_huge_reserved(addr)) return true; + if (reset) _mi_stat_increase(&stats->reset, csize); + else _mi_stat_decrease(&stats->reset, csize); + if (!reset) return true; // nothing to do on unreset! + + #if (MI_DEBUG>1) + if (MI_SECURE==0) { + memset(start, 0, csize); // pretend it is eagerly reset + } + #endif + +#if defined(_WIN32) + // Testing shows that for us (on `malloc-large`) MEM_RESET is 2x faster than DiscardVirtualMemory + void* p = VirtualAlloc(start, csize, MEM_RESET, PAGE_READWRITE); + mi_assert_internal(p == start); + #if 1 + if (p == start && start != NULL) { + VirtualUnlock(start,csize); // VirtualUnlock after MEM_RESET removes the memory from the working set + } + #endif + if (p != start) return false; +#else +#if defined(MADV_FREE) + static int advice = MADV_FREE; + int err = madvise(start, csize, advice); + if (err != 0 && errno == EINVAL && advice == MADV_FREE) { + // if MADV_FREE is not supported, fall back to MADV_DONTNEED from now on + advice = MADV_DONTNEED; + err = madvise(start, csize, advice); + } +#elif defined(__wasi__) + int err = 0; +#else + int err = madvise(start, csize, MADV_DONTNEED); +#endif + if (err != 0) { + _mi_warning_message("madvise reset error: start: 0x%p, csize: 0x%x, errno: %i\n", start, csize, errno); + } + //mi_assert(err == 0); + if (err != 0) return false; +#endif + return true; +} + +// Signal to the OS that the address range is no longer in use +// but may be used later again. This will release physical memory +// pages and reduce swapping while keeping the memory committed. +// We page align to a conservative area inside the range to reset. +bool _mi_os_reset(void* addr, size_t size, mi_stats_t* stats) { + if (mi_option_is_enabled(mi_option_reset_decommits)) { + return _mi_os_decommit(addr,size,stats); + } + else { + return mi_os_resetx(addr, size, true, stats); + } +} + +bool _mi_os_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* stats) { + if (mi_option_is_enabled(mi_option_reset_decommits)) { + return _mi_os_commit_unreset(addr, size, is_zero, stats); // re-commit it (conservatively!) + } + else { + *is_zero = false; + return mi_os_resetx(addr, size, false, stats); + } +} + + +// Protect a region in memory to be not accessible. +static bool mi_os_protectx(void* addr, size_t size, bool protect) { + // page align conservatively within the range + size_t csize = 0; + void* start = mi_os_page_align_area_conservative(addr, size, &csize); + if (csize == 0) return false; + if (_mi_os_is_huge_reserved(addr)) { + _mi_warning_message("cannot mprotect memory allocated in huge OS pages\n"); + } + int err = 0; +#ifdef _WIN32 + DWORD oldprotect = 0; + BOOL ok = VirtualProtect(start, csize, protect ? PAGE_NOACCESS : PAGE_READWRITE, &oldprotect); + err = (ok ? 0 : GetLastError()); +#elif defined(__wasi__) + err = 0; +#else + err = mprotect(start, csize, protect ? PROT_NONE : (PROT_READ | PROT_WRITE)); + if (err != 0) { err = errno; } +#endif + if (err != 0) { + _mi_warning_message("mprotect error: start: 0x%p, csize: 0x%x, err: %i\n", start, csize, err); + } + return (err == 0); +} + +bool _mi_os_protect(void* addr, size_t size) { + return mi_os_protectx(addr, size, true); +} + +bool _mi_os_unprotect(void* addr, size_t size) { + return mi_os_protectx(addr, size, false); +} + + + +bool _mi_os_shrink(void* p, size_t oldsize, size_t newsize, mi_stats_t* stats) { + // page align conservatively within the range + mi_assert_internal(oldsize > newsize && p != NULL); + if (oldsize < newsize || p == NULL) return false; + if (oldsize == newsize) return true; + + // oldsize and newsize should be page aligned or we cannot shrink precisely + void* addr = (uint8_t*)p + newsize; + size_t size = 0; + void* start = mi_os_page_align_area_conservative(addr, oldsize - newsize, &size); + if (size == 0 || start != addr) return false; + +#ifdef _WIN32 + // we cannot shrink on windows, but we can decommit + return _mi_os_decommit(start, size, stats); +#else + return mi_os_mem_free(start, size, true, stats); +#endif +} + + +/* ---------------------------------------------------------------------------- +Support for huge OS pages (1Gib) that are reserved up-front and never +released. Only regions are allocated in here (see `memory.c`) so the memory +will be reused. +-----------------------------------------------------------------------------*/ +#define MI_HUGE_OS_PAGE_SIZE ((size_t)1 << 30) // 1GiB + +typedef struct mi_huge_info_s { + volatile _Atomic(void*) start; // start of huge page area (32TiB) + volatile _Atomic(size_t) reserved; // total reserved size + volatile _Atomic(size_t) used; // currently allocated +} mi_huge_info_t; + +static mi_huge_info_t os_huge_reserved = { NULL, 0, ATOMIC_VAR_INIT(0) }; + +bool _mi_os_is_huge_reserved(void* p) { + return (mi_atomic_read_ptr(&os_huge_reserved.start) != NULL && + p >= mi_atomic_read_ptr(&os_huge_reserved.start) && + (uint8_t*)p < (uint8_t*)mi_atomic_read_ptr(&os_huge_reserved.start) + mi_atomic_read(&os_huge_reserved.reserved)); +} + +void* _mi_os_try_alloc_from_huge_reserved(size_t size, size_t try_alignment) +{ + // only allow large aligned allocations (e.g. regions) + if (size < MI_SEGMENT_SIZE || (size % MI_SEGMENT_SIZE) != 0) return NULL; + if (try_alignment > MI_SEGMENT_SIZE) return NULL; + if (mi_atomic_read_ptr(&os_huge_reserved.start)==NULL) return NULL; + if (mi_atomic_read(&os_huge_reserved.used) >= mi_atomic_read(&os_huge_reserved.reserved)) return NULL; // already full + + // always aligned + mi_assert_internal(mi_atomic_read(&os_huge_reserved.used) % MI_SEGMENT_SIZE == 0 ); + mi_assert_internal( (uintptr_t)mi_atomic_read_ptr(&os_huge_reserved.start) % MI_SEGMENT_SIZE == 0 ); + + // try to reserve space + size_t base = mi_atomic_addu( &os_huge_reserved.used, size ); + if ((base + size) > os_huge_reserved.reserved) { + // "free" our over-allocation + mi_atomic_subu( &os_huge_reserved.used, size); + return NULL; + } + + // success! + uint8_t* p = (uint8_t*)mi_atomic_read_ptr(&os_huge_reserved.start) + base; + mi_assert_internal( (uintptr_t)p % MI_SEGMENT_SIZE == 0 ); + return p; +} + +/* +static void mi_os_free_huge_reserved() { + uint8_t* addr = os_huge_reserved.start; + size_t total = os_huge_reserved.reserved; + os_huge_reserved.reserved = 0; + os_huge_reserved.start = NULL; + for( size_t current = 0; current < total; current += MI_HUGE_OS_PAGE_SIZE) { + _mi_os_free(addr + current, MI_HUGE_OS_PAGE_SIZE, &_mi_stats_main); + } +} +*/ + +#if !(MI_INTPTR_SIZE >= 8 && (defined(_WIN32) || defined(MI_OS_USE_MMAP))) +int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept { + UNUSED(pages); UNUSED(max_secs); + if (pages_reserved != NULL) *pages_reserved = 0; + return ENOMEM; +} +#else +int mi_reserve_huge_os_pages( size_t pages, double max_secs, size_t* pages_reserved ) mi_attr_noexcept +{ + if (pages_reserved != NULL) *pages_reserved = 0; + if (max_secs==0) return ETIMEDOUT; // timeout + if (pages==0) return 0; // ok + if (!mi_atomic_cas_ptr_strong(&os_huge_reserved.start,(void*)1,NULL)) return ETIMEDOUT; // already reserved + + // Set the start address after the 32TiB area + uint8_t* start = (uint8_t*)((uintptr_t)32 << 40); // 32TiB virtual start address + #if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of huge pages unless in debug mode + uintptr_t r = _mi_random_init((uintptr_t)&mi_reserve_huge_os_pages); + start = start + ((uintptr_t)MI_HUGE_OS_PAGE_SIZE * ((r>>17) & 0x3FF)); // (randomly 0-1024)*1GiB == 0 to 1TiB + #endif + + // Allocate one page at the time but try to place them contiguously + // We allocate one page at the time to be able to abort if it takes too long + double start_t = _mi_clock_start(); + uint8_t* addr = start; // current top of the allocations + for (size_t page = 0; page < pages; page++, addr += MI_HUGE_OS_PAGE_SIZE ) { + // allocate a page + void* p = NULL; + bool is_large = true; + #ifdef _WIN32 + if (page==0) { mi_win_enable_large_os_pages(); } + p = mi_win_virtual_alloc(addr, MI_HUGE_OS_PAGE_SIZE, 0, MEM_LARGE_PAGES | MEM_COMMIT | MEM_RESERVE, true, true, &is_large); + #elif defined(MI_OS_USE_MMAP) + p = mi_unix_mmap(addr, MI_HUGE_OS_PAGE_SIZE, 0, PROT_READ | PROT_WRITE, true, true, &is_large); + #else + // always fail + #endif + + // Did we succeed at a contiguous address? + if (p != addr) { + // no success, issue a warning and return with an error + if (p != NULL) { + _mi_warning_message("could not allocate contiguous huge page %zu at 0x%p\n", page, addr); + _mi_os_free(p, MI_HUGE_OS_PAGE_SIZE, &_mi_stats_main ); + } + else { + #ifdef _WIN32 + int err = GetLastError(); + #else + int err = errno; + #endif + _mi_warning_message("could not allocate huge page %zu at 0x%p, error: %i\n", page, addr, err); + } + return ENOMEM; + } + // success, record it + if (page==0) { + mi_atomic_write_ptr(&os_huge_reserved.start, addr); // don't switch the order of these writes + mi_atomic_write(&os_huge_reserved.reserved, MI_HUGE_OS_PAGE_SIZE); + } + else { + mi_atomic_addu(&os_huge_reserved.reserved,MI_HUGE_OS_PAGE_SIZE); + } + _mi_stat_increase(&_mi_stats_main.committed, MI_HUGE_OS_PAGE_SIZE); + _mi_stat_increase(&_mi_stats_main.reserved, MI_HUGE_OS_PAGE_SIZE); + if (pages_reserved != NULL) { *pages_reserved = page + 1; } + + // check for timeout + double elapsed = _mi_clock_end(start_t); + if (elapsed > max_secs) return ETIMEDOUT; + if (page >= 1) { + double estimate = ((elapsed / (double)(page+1)) * (double)pages); + if (estimate > 1.5*max_secs) return ETIMEDOUT; // seems like we are going to timeout + } + } + _mi_verbose_message("reserved %zu huge pages\n", pages); + return 0; +} +#endif + diff --git a/runtime/src/mimalloc/c/page-queue.c b/runtime/src/mimalloc/c/page-queue.c new file mode 100644 index 00000000000..95443a69bb5 --- /dev/null +++ b/runtime/src/mimalloc/c/page-queue.c @@ -0,0 +1,361 @@ +/*---------------------------------------------------------------------------- +Copyright (c) 2018, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ + +/* ----------------------------------------------------------- + Definition of page queues for each block size +----------------------------------------------------------- */ + +#ifndef MI_IN_PAGE_C +#error "this file should be included from 'page.c'" +#endif + +/* ----------------------------------------------------------- + Minimal alignment in machine words (i.e. `sizeof(void*)`) +----------------------------------------------------------- */ + +#if (MI_MAX_ALIGN_SIZE > 4*MI_INTPTR_SIZE) + #error "define alignment for more than 4x word size for this platform" +#elif (MI_MAX_ALIGN_SIZE > 2*MI_INTPTR_SIZE) + #define MI_ALIGN4W // 4 machine words minimal alignment +#elif (MI_MAX_ALIGN_SIZE > MI_INTPTR_SIZE) + #define MI_ALIGN2W // 2 machine words minimal alignment +#else + // ok, default alignment is 1 word +#endif + + +/* ----------------------------------------------------------- + Queue query +----------------------------------------------------------- */ + + +static inline bool mi_page_queue_is_huge(const mi_page_queue_t* pq) { + return (pq->block_size == (MI_LARGE_OBJ_SIZE_MAX+sizeof(uintptr_t))); +} + +static inline bool mi_page_queue_is_full(const mi_page_queue_t* pq) { + return (pq->block_size == (MI_LARGE_OBJ_SIZE_MAX+(2*sizeof(uintptr_t)))); +} + +static inline bool mi_page_queue_is_special(const mi_page_queue_t* pq) { + return (pq->block_size > MI_LARGE_OBJ_SIZE_MAX); +} + +/* ----------------------------------------------------------- + Bins +----------------------------------------------------------- */ + +// Bit scan reverse: return the index of the highest bit. +static inline uint8_t mi_bsr32(uint32_t x); + +#if defined(_MSC_VER) +#include +static inline uint8_t mi_bsr32(uint32_t x) { + uint32_t idx; + _BitScanReverse((DWORD*)&idx, x); + return (uint8_t)idx; +} +#elif defined(__GNUC__) || defined(__clang__) +static inline uint8_t mi_bsr32(uint32_t x) { + return (31 - __builtin_clz(x)); +} +#else +static inline uint8_t mi_bsr32(uint32_t x) { + // de Bruijn multiplication, see + static const uint8_t debruijn[32] = { + 31, 0, 22, 1, 28, 23, 18, 2, 29, 26, 24, 10, 19, 7, 3, 12, + 30, 21, 27, 17, 25, 9, 6, 11, 20, 16, 8, 5, 15, 4, 14, 13, + }; + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + x |= x >> 16; + x++; + return debruijn[(x*0x076be629) >> 27]; +} +#endif + +// Bit scan reverse: return the index of the highest bit. +uint8_t _mi_bsr(uintptr_t x) { + if (x == 0) return 0; +#if MI_INTPTR_SIZE==8 + uint32_t hi = (x >> 32); + return (hi == 0 ? mi_bsr32((uint32_t)x) : 32 + mi_bsr32(hi)); +#elif MI_INTPTR_SIZE==4 + return mi_bsr32(x); +#else +# error "define bsr for non-32 or 64-bit platforms" +#endif +} + +// Return the bin for a given field size. +// Returns MI_BIN_HUGE if the size is too large. +// We use `wsize` for the size in "machine word sizes", +// i.e. byte size == `wsize*sizeof(void*)`. +extern inline uint8_t _mi_bin(size_t size) { + size_t wsize = _mi_wsize_from_size(size); + uint8_t bin; + if (wsize <= 1) { + bin = 1; + } + #if defined(MI_ALIGN4W) + else if (wsize <= 4) { + bin = (uint8_t)((wsize+1)&~1); // round to double word sizes + } + #elif defined(MI_ALIGN2W) + else if (wsize <= 8) { + bin = (uint8_t)((wsize+1)&~1); // round to double word sizes + } + #else + else if (wsize <= 8) { + bin = (uint8_t)wsize; + } + #endif + else if (wsize > MI_LARGE_OBJ_WSIZE_MAX) { + bin = MI_BIN_HUGE; + } + else { + #if defined(MI_ALIGN4W) + if (wsize <= 16) { wsize = (wsize+3)&~3; } // round to 4x word sizes + #endif + wsize--; + // find the highest bit + uint8_t b = mi_bsr32((uint32_t)wsize); + // and use the top 3 bits to determine the bin (~12.5% worst internal fragmentation). + // - adjust with 3 because we use do not round the first 8 sizes + // which each get an exact bin + bin = ((b << 2) + (uint8_t)((wsize >> (b - 2)) & 0x03)) - 3; + mi_assert_internal(bin < MI_BIN_HUGE); + } + mi_assert_internal(bin > 0 && bin <= MI_BIN_HUGE); + return bin; +} + + + +/* ----------------------------------------------------------- + Queue of pages with free blocks +----------------------------------------------------------- */ + +size_t _mi_bin_size(uint8_t bin) { + return _mi_heap_empty.pages[bin].block_size; +} + +// Good size for allocation +size_t mi_good_size(size_t size) mi_attr_noexcept { + if (size <= MI_LARGE_OBJ_SIZE_MAX) { + return _mi_bin_size(_mi_bin(size)); + } + else { + return _mi_align_up(size,_mi_os_page_size()); + } +} + +#if (MI_DEBUG>1) +static bool mi_page_queue_contains(mi_page_queue_t* queue, const mi_page_t* page) { + mi_assert_internal(page != NULL); + mi_page_t* list = queue->first; + while (list != NULL) { + mi_assert_internal(list->next == NULL || list->next->prev == list); + mi_assert_internal(list->prev == NULL || list->prev->next == list); + if (list == page) break; + list = list->next; + } + return (list == page); +} + +#endif + +#if (MI_DEBUG>1) +static bool mi_heap_contains_queue(const mi_heap_t* heap, const mi_page_queue_t* pq) { + return (pq >= &heap->pages[0] && pq <= &heap->pages[MI_BIN_FULL]); +} +#endif + +static mi_page_queue_t* mi_page_queue_of(const mi_page_t* page) { + uint8_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : _mi_bin(page->block_size)); + mi_heap_t* heap = page->heap; + mi_assert_internal(heap != NULL && bin <= MI_BIN_FULL); + mi_page_queue_t* pq = &heap->pages[bin]; + mi_assert_internal(bin >= MI_BIN_HUGE || page->block_size == pq->block_size); + mi_assert_expensive(mi_page_queue_contains(pq, page)); + return pq; +} + +static mi_page_queue_t* mi_heap_page_queue_of(mi_heap_t* heap, const mi_page_t* page) { + uint8_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : _mi_bin(page->block_size)); + mi_assert_internal(bin <= MI_BIN_FULL); + mi_page_queue_t* pq = &heap->pages[bin]; + mi_assert_internal(mi_page_is_in_full(page) || page->block_size == pq->block_size); + return pq; +} + +// The current small page array is for efficiency and for each +// small size (up to 256) it points directly to the page for that +// size without having to compute the bin. This means when the +// current free page queue is updated for a small bin, we need to update a +// range of entries in `_mi_page_small_free`. +static inline void mi_heap_queue_first_update(mi_heap_t* heap, const mi_page_queue_t* pq) { + mi_assert_internal(mi_heap_contains_queue(heap,pq)); + size_t size = pq->block_size; + if (size > MI_SMALL_SIZE_MAX) return; + + mi_page_t* page = pq->first; + if (pq->first == NULL) page = (mi_page_t*)&_mi_page_empty; + + // find index in the right direct page array + size_t start; + size_t idx = _mi_wsize_from_size(size); + mi_page_t** pages_free = heap->pages_free_direct; + + if (pages_free[idx] == page) return; // already set + + // find start slot + if (idx<=1) { + start = 0; + } + else { + // find previous size; due to minimal alignment upto 3 previous bins may need to be skipped + uint8_t bin = _mi_bin(size); + const mi_page_queue_t* prev = pq - 1; + while( bin == _mi_bin(prev->block_size) && prev > &heap->pages[0]) { + prev--; + } + start = 1 + _mi_wsize_from_size(prev->block_size); + if (start > idx) start = idx; + } + + // set size range to the right page + mi_assert(start <= idx); + for (size_t sz = start; sz <= idx; sz++) { + pages_free[sz] = page; + } +} + +/* +static bool mi_page_queue_is_empty(mi_page_queue_t* queue) { + return (queue->first == NULL); +} +*/ + +static void mi_page_queue_remove(mi_page_queue_t* queue, mi_page_t* page) { + mi_assert_internal(page != NULL); + mi_assert_expensive(mi_page_queue_contains(queue, page)); + mi_assert_internal(page->block_size == queue->block_size || (page->block_size > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_huge(queue)) || (mi_page_is_in_full(page) && mi_page_queue_is_full(queue))); + if (page->prev != NULL) page->prev->next = page->next; + if (page->next != NULL) page->next->prev = page->prev; + if (page == queue->last) queue->last = page->prev; + if (page == queue->first) { + queue->first = page->next; + // update first + mi_heap_t* heap = page->heap; + mi_assert_internal(mi_heap_contains_queue(heap, queue)); + mi_heap_queue_first_update(heap,queue); + } + page->heap->page_count--; + page->next = NULL; + page->prev = NULL; + mi_atomic_write_ptr(mi_atomic_cast(void*, &page->heap), NULL); + mi_page_set_in_full(page,false); +} + + +static void mi_page_queue_push(mi_heap_t* heap, mi_page_queue_t* queue, mi_page_t* page) { + mi_assert_internal(page->heap == NULL); + mi_assert_internal(!mi_page_queue_contains(queue, page)); + mi_assert_internal(_mi_page_segment(page)->page_kind != MI_PAGE_HUGE); + mi_assert_internal(page->block_size == queue->block_size || + (page->block_size > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_huge(queue)) || + (mi_page_is_in_full(page) && mi_page_queue_is_full(queue))); + + mi_page_set_in_full(page, mi_page_queue_is_full(queue)); + mi_atomic_write_ptr(mi_atomic_cast(void*, &page->heap), heap); + page->next = queue->first; + page->prev = NULL; + if (queue->first != NULL) { + mi_assert_internal(queue->first->prev == NULL); + queue->first->prev = page; + queue->first = page; + } + else { + queue->first = queue->last = page; + } + + // update direct + mi_heap_queue_first_update(heap, queue); + heap->page_count++; +} + + +static void mi_page_queue_enqueue_from(mi_page_queue_t* to, mi_page_queue_t* from, mi_page_t* page) { + mi_assert_internal(page != NULL); + mi_assert_expensive(mi_page_queue_contains(from, page)); + mi_assert_expensive(!mi_page_queue_contains(to, page)); + mi_assert_internal((page->block_size == to->block_size && page->block_size == from->block_size) || + (page->block_size == to->block_size && mi_page_queue_is_full(from)) || + (page->block_size == from->block_size && mi_page_queue_is_full(to)) || + (page->block_size > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_huge(to)) || + (page->block_size > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_full(to))); + + if (page->prev != NULL) page->prev->next = page->next; + if (page->next != NULL) page->next->prev = page->prev; + if (page == from->last) from->last = page->prev; + if (page == from->first) { + from->first = page->next; + // update first + mi_heap_t* heap = page->heap; + mi_assert_internal(mi_heap_contains_queue(heap, from)); + mi_heap_queue_first_update(heap, from); + } + + page->prev = to->last; + page->next = NULL; + if (to->last != NULL) { + mi_assert_internal(page->heap == to->last->heap); + to->last->next = page; + to->last = page; + } + else { + to->first = page; + to->last = page; + mi_heap_queue_first_update(page->heap, to); + } + + mi_page_set_in_full(page, mi_page_queue_is_full(to)); +} + +size_t _mi_page_queue_append(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_queue_t* append) { + mi_assert_internal(mi_heap_contains_queue(heap,pq)); + mi_assert_internal(pq->block_size == append->block_size); + + if (append->first==NULL) return 0; + + // set append pages to new heap and count + size_t count = 0; + for (mi_page_t* page = append->first; page != NULL; page = page->next) { + mi_atomic_write_ptr(mi_atomic_cast(void*, &page->heap), heap); + count++; + } + + if (pq->last==NULL) { + // take over afresh + mi_assert_internal(pq->first==NULL); + pq->first = append->first; + pq->last = append->last; + mi_heap_queue_first_update(heap, pq); + } + else { + // append to end + mi_assert_internal(pq->last!=NULL); + mi_assert_internal(append->first!=NULL); + pq->last->next = append->first; + append->first->prev = pq->last; + pq->last = append->last; + } + return count; +} diff --git a/runtime/src/mimalloc/c/page.c b/runtime/src/mimalloc/c/page.c new file mode 100644 index 00000000000..437cd0a57a1 --- /dev/null +++ b/runtime/src/mimalloc/c/page.c @@ -0,0 +1,818 @@ +/*---------------------------------------------------------------------------- +Copyright (c) 2018, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ + +/* ----------------------------------------------------------- + The core of the allocator. Every segment contains + pages of a certain block size. The main function + exported is `mi_malloc_generic`. +----------------------------------------------------------- */ + +#include "mimalloc.h" +#include "mimalloc-internal.h" +#include "mimalloc-atomic.h" + +/* ----------------------------------------------------------- + Definition of page queues for each block size +----------------------------------------------------------- */ + +#define MI_IN_PAGE_C +#include "page-queue.c" +#undef MI_IN_PAGE_C + + +/* ----------------------------------------------------------- + Page helpers +----------------------------------------------------------- */ + +// Index a block in a page +static inline mi_block_t* mi_page_block_at(const mi_page_t* page, void* page_start, size_t i) { + mi_assert_internal(page != NULL); + mi_assert_internal(i <= page->reserved); + return (mi_block_t*)((uint8_t*)page_start + (i * page->block_size)); +} + +static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t size, mi_stats_t* stats); + + +#if (MI_DEBUG>1) +static size_t mi_page_list_count(mi_page_t* page, mi_block_t* head) { + size_t count = 0; + while (head != NULL) { + mi_assert_internal(page == _mi_ptr_page(head)); + count++; + head = mi_block_next(page, head); + } + return count; +} + +/* +// Start of the page available memory +static inline uint8_t* mi_page_area(const mi_page_t* page) { + return _mi_page_start(_mi_page_segment(page), page, NULL); +} +*/ + +static bool mi_page_list_is_valid(mi_page_t* page, mi_block_t* p) { + size_t psize; + uint8_t* page_area = _mi_page_start(_mi_page_segment(page), page, &psize); + mi_block_t* start = (mi_block_t*)page_area; + mi_block_t* end = (mi_block_t*)(page_area + psize); + while(p != NULL) { + if (p < start || p >= end) return false; + p = mi_block_next(page, p); + } + return true; +} + +static bool mi_page_is_valid_init(mi_page_t* page) { + mi_assert_internal(page->block_size > 0); + mi_assert_internal(page->used <= page->capacity); + mi_assert_internal(page->capacity <= page->reserved); + + mi_segment_t* segment = _mi_page_segment(page); + uint8_t* start = _mi_page_start(segment,page,NULL); + mi_assert_internal(start == _mi_segment_page_start(segment,page,page->block_size,NULL)); + //mi_assert_internal(start + page->capacity*page->block_size == page->top); + + mi_assert_internal(mi_page_list_is_valid(page,page->free)); + mi_assert_internal(mi_page_list_is_valid(page,page->local_free)); + + #if MI_DEBUG>3 // generally too expensive to check this + if (page->flags.is_zero) { + for(mi_block_t* block = page->free; block != NULL; mi_block_next(page,block)) { + mi_assert_expensive(mi_mem_is_zero(block + 1, page->block_size - sizeof(mi_block_t))); + } + } + #endif + + mi_block_t* tfree = mi_tf_block(page->thread_free); + mi_assert_internal(mi_page_list_is_valid(page, tfree)); + size_t tfree_count = mi_page_list_count(page, tfree); + mi_assert_internal(tfree_count <= page->thread_freed + 1); + + size_t free_count = mi_page_list_count(page, page->free) + mi_page_list_count(page, page->local_free); + mi_assert_internal(page->used + free_count == page->capacity); + + return true; +} + +bool _mi_page_is_valid(mi_page_t* page) { + mi_assert_internal(mi_page_is_valid_init(page)); + #if MI_SECURE + mi_assert_internal(page->cookie != 0); + #endif + if (page->heap!=NULL) { + mi_segment_t* segment = _mi_page_segment(page); + mi_assert_internal(!_mi_process_is_initialized || segment->thread_id == page->heap->thread_id || segment->thread_id==0); + if (segment->page_kind != MI_PAGE_HUGE) { + mi_page_queue_t* pq = mi_page_queue_of(page); + mi_assert_internal(mi_page_queue_contains(pq, page)); + mi_assert_internal(pq->block_size==page->block_size || page->block_size > MI_LARGE_OBJ_SIZE_MAX || mi_page_is_in_full(page)); + mi_assert_internal(mi_heap_contains_queue(page->heap,pq)); + } + } + return true; +} +#endif + + +void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay ) { + mi_thread_free_t tfree; + mi_thread_free_t tfreex; + + do { + tfreex = tfree = page->thread_free; + if (mi_unlikely(mi_tf_delayed(tfree) < MI_DELAYED_FREEING)) { + tfreex = mi_tf_set_delayed(tfree,delay); + } + else if (mi_unlikely(mi_tf_delayed(tfree) == MI_DELAYED_FREEING)) { + mi_atomic_yield(); // delay until outstanding MI_DELAYED_FREEING are done. + continue; // and try again + } + } + while((mi_tf_delayed(tfreex) != mi_tf_delayed(tfree)) && // avoid atomic operation if already equal + !mi_atomic_cas_weak(mi_atomic_cast(uintptr_t,&page->thread_free), tfreex, tfree)); +} + + +/* ----------------------------------------------------------- + Page collect the `local_free` and `thread_free` lists +----------------------------------------------------------- */ + +// Collect the local `thread_free` list using an atomic exchange. +// Note: The exchange must be done atomically as this is used right after +// moving to the full list in `mi_page_collect_ex` and we need to +// ensure that there was no race where the page became unfull just before the move. +static void _mi_page_thread_free_collect(mi_page_t* page) +{ + mi_block_t* head; + mi_thread_free_t tfree; + mi_thread_free_t tfreex; + do { + tfree = page->thread_free; + head = mi_tf_block(tfree); + tfreex = mi_tf_set_block(tfree,NULL); + } while (!mi_atomic_cas_weak(mi_atomic_cast(uintptr_t,&page->thread_free), tfreex, tfree)); + + // return if the list is empty + if (head == NULL) return; + + // find the tail -- also to get a proper count (without data races) + uintptr_t max_count = page->capacity; // cannot collect more than capacity + uintptr_t count = 1; + mi_block_t* tail = head; + mi_block_t* next; + while ((next = mi_block_next(page,tail)) != NULL && count <= max_count) { + count++; + tail = next; + } + // if `count > max_count` there was a memory corruption (possibly infinite list due to double multi-threaded free) + if (count > max_count) { + _mi_fatal_error("corrupted thread-free list\n"); + return; // the thread-free items cannot be freed + } + + // and append the current local free list + mi_block_set_next(page,tail, page->local_free); + page->local_free = head; + + // update counts now + mi_atomic_subu(&page->thread_freed, count); + page->used -= count; +} + +void _mi_page_free_collect(mi_page_t* page, bool force) { + mi_assert_internal(page!=NULL); + + // collect the thread free list + if (force || mi_tf_block(page->thread_free) != NULL) { // quick test to avoid an atomic operation + _mi_page_thread_free_collect(page); + } + + // and the local free list + if (page->local_free != NULL) { + if (mi_likely(page->free == NULL)) { + // usual case + page->free = page->local_free; + page->local_free = NULL; + page->is_zero = false; + } + else if (force) { + // append -- only on shutdown (force) as this is a linear operation + mi_block_t* tail = page->local_free; + mi_block_t* next; + while ((next = mi_block_next(page, tail)) != NULL) { + tail = next; + } + mi_block_set_next(page, tail, page->free); + page->free = page->local_free; + page->local_free = NULL; + page->is_zero = false; + } + } + + mi_assert_internal(!force || page->local_free == NULL); +} + + + +/* ----------------------------------------------------------- + Page fresh and retire +----------------------------------------------------------- */ + +// called from segments when reclaiming abandoned pages +void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) { + mi_assert_expensive(mi_page_is_valid_init(page)); + mi_assert_internal(page->heap == NULL); + mi_assert_internal(_mi_page_segment(page)->page_kind != MI_PAGE_HUGE); + _mi_page_free_collect(page,false); + mi_page_queue_t* pq = mi_page_queue(heap, page->block_size); + mi_page_queue_push(heap, pq, page); + mi_assert_expensive(_mi_page_is_valid(page)); +} + +// allocate a fresh page from a segment +static mi_page_t* mi_page_fresh_alloc(mi_heap_t* heap, mi_page_queue_t* pq, size_t block_size) { + mi_assert_internal(pq==NULL||mi_heap_contains_queue(heap, pq)); + mi_page_t* page = _mi_segment_page_alloc(block_size, &heap->tld->segments, &heap->tld->os); + if (page == NULL) return NULL; + mi_assert_internal(pq==NULL || _mi_page_segment(page)->page_kind != MI_PAGE_HUGE); + mi_page_init(heap, page, block_size, &heap->tld->stats); + _mi_stat_increase( &heap->tld->stats.pages, 1); + if (pq!=NULL) mi_page_queue_push(heap, pq, page); // huge pages use pq==NULL + mi_assert_expensive(_mi_page_is_valid(page)); + return page; +} + +// Get a fresh page to use +static mi_page_t* mi_page_fresh(mi_heap_t* heap, mi_page_queue_t* pq) { + mi_assert_internal(mi_heap_contains_queue(heap, pq)); + + // try to reclaim an abandoned page first + mi_page_t* page = pq->first; + if (!heap->no_reclaim && + _mi_segment_try_reclaim_abandoned(heap, false, &heap->tld->segments) && + page != pq->first) + { + // we reclaimed, and we got lucky with a reclaimed page in our queue + page = pq->first; + if (page->free != NULL) return page; + } + // otherwise allocate the page + page = mi_page_fresh_alloc(heap, pq, pq->block_size); + if (page==NULL) return NULL; + mi_assert_internal(pq->block_size==page->block_size); + mi_assert_internal(pq==mi_page_queue(heap,page->block_size)); + return page; +} + +/* ----------------------------------------------------------- + Do any delayed frees + (put there by other threads if they deallocated in a full page) +----------------------------------------------------------- */ +void _mi_heap_delayed_free(mi_heap_t* heap) { + // take over the list + mi_block_t* block; + do { + block = (mi_block_t*)heap->thread_delayed_free; + } while (block != NULL && !mi_atomic_cas_ptr_weak(mi_atomic_cast(void*,&heap->thread_delayed_free), NULL, block)); + + // and free them all + while(block != NULL) { + mi_block_t* next = mi_block_nextx(heap,block, heap->cookie); + // use internal free instead of regular one to keep stats etc correct + if (!_mi_free_delayed_block(block)) { + // we might already start delayed freeing while another thread has not yet + // reset the delayed_freeing flag; in that case delay it further by reinserting. + mi_block_t* dfree; + do { + dfree = (mi_block_t*)heap->thread_delayed_free; + mi_block_set_nextx(heap, block, dfree, heap->cookie); + } while (!mi_atomic_cas_ptr_weak(mi_atomic_cast(void*,&heap->thread_delayed_free), block, dfree)); + + } + block = next; + } +} + +/* ----------------------------------------------------------- + Unfull, abandon, free and retire +----------------------------------------------------------- */ + +// Move a page from the full list back to a regular list +void _mi_page_unfull(mi_page_t* page) { + mi_assert_internal(page != NULL); + mi_assert_expensive(_mi_page_is_valid(page)); + mi_assert_internal(mi_page_is_in_full(page)); + + _mi_page_use_delayed_free(page, MI_NO_DELAYED_FREE); + if (!mi_page_is_in_full(page)) return; + + mi_heap_t* heap = page->heap; + mi_page_queue_t* pqfull = &heap->pages[MI_BIN_FULL]; + mi_page_set_in_full(page, false); // to get the right queue + mi_page_queue_t* pq = mi_heap_page_queue_of(heap, page); + mi_page_set_in_full(page, true); + mi_page_queue_enqueue_from(pq, pqfull, page); +} + +static void mi_page_to_full(mi_page_t* page, mi_page_queue_t* pq) { + mi_assert_internal(pq == mi_page_queue_of(page)); + mi_assert_internal(!mi_page_immediate_available(page)); + mi_assert_internal(!mi_page_is_in_full(page)); + + _mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE); + if (mi_page_is_in_full(page)) return; + + mi_page_queue_enqueue_from(&page->heap->pages[MI_BIN_FULL], pq, page); + _mi_page_free_collect(page,false); // try to collect right away in case another thread freed just before MI_USE_DELAYED_FREE was set +} + + +// Abandon a page with used blocks at the end of a thread. +// Note: only call if it is ensured that no references exist from +// the `page->heap->thread_delayed_free` into this page. +// Currently only called through `mi_heap_collect_ex` which ensures this. +void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq) { + mi_assert_internal(page != NULL); + mi_assert_expensive(_mi_page_is_valid(page)); + mi_assert_internal(pq == mi_page_queue_of(page)); + mi_assert_internal(page->heap != NULL); + +#if MI_DEBUG > 1 + mi_heap_t* pheap = (mi_heap_t*)mi_atomic_read_ptr(mi_atomic_cast(void*, &page->heap)); +#endif + + // remove from our page list + mi_segments_tld_t* segments_tld = &page->heap->tld->segments; + mi_page_queue_remove(pq, page); + + // page is no longer associated with our heap + mi_atomic_write_ptr(mi_atomic_cast(void*, &page->heap), NULL); + +#if MI_DEBUG>1 + // check there are no references left.. + for (mi_block_t* block = (mi_block_t*)pheap->thread_delayed_free; block != NULL; block = mi_block_nextx(pheap, block, pheap->cookie)) { + mi_assert_internal(_mi_ptr_page(block) != page); + } +#endif + + // and abandon it + mi_assert_internal(page->heap == NULL); + _mi_segment_page_abandon(page,segments_tld); +} + + +// Free a page with no more free blocks +void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) { + mi_assert_internal(page != NULL); + mi_assert_expensive(_mi_page_is_valid(page)); + mi_assert_internal(pq == mi_page_queue_of(page)); + mi_assert_internal(mi_page_all_free(page)); + #if MI_DEBUG>1 + // check if we can safely free + mi_thread_free_t free = mi_tf_set_delayed(page->thread_free,MI_NEVER_DELAYED_FREE); + free = mi_atomic_exchange(&page->thread_free, free); + mi_assert_internal(mi_tf_delayed(free) != MI_DELAYED_FREEING); + #endif + + mi_page_set_has_aligned(page, false); + + // account for huge pages here + // (note: no longer necessary as huge pages are always abandoned) + if (page->block_size > MI_LARGE_OBJ_SIZE_MAX) { + if (page->block_size > MI_HUGE_OBJ_SIZE_MAX) { + _mi_stat_decrease(&page->heap->tld->stats.giant, page->block_size); + } + else { + _mi_stat_decrease(&page->heap->tld->stats.huge, page->block_size); + } + } + + // remove from the page list + // (no need to do _mi_heap_delayed_free first as all blocks are already free) + mi_segments_tld_t* segments_tld = &page->heap->tld->segments; + mi_page_queue_remove(pq, page); + + // and free it + mi_assert_internal(page->heap == NULL); + _mi_segment_page_free(page, force, segments_tld); +} + +// Retire a page with no more used blocks +// Important to not retire too quickly though as new +// allocations might coming. +// Note: called from `mi_free` and benchmarks often +// trigger this due to freeing everything and then +// allocating again so careful when changing this. +void _mi_page_retire(mi_page_t* page) { + mi_assert_internal(page != NULL); + mi_assert_expensive(_mi_page_is_valid(page)); + mi_assert_internal(mi_page_all_free(page)); + + mi_page_set_has_aligned(page, false); + + // don't retire too often.. + // (or we end up retiring and re-allocating most of the time) + // NOTE: refine this more: we should not retire if this + // is the only page left with free blocks. It is not clear + // how to check this efficiently though... + // for now, we don't retire if it is the only page left of this size class. + mi_page_queue_t* pq = mi_page_queue_of(page); + if (mi_likely(page->block_size <= (MI_SMALL_SIZE_MAX/4))) { + // if (mi_page_mostly_used(page->prev) && mi_page_mostly_used(page->next)) { + if (pq->last==page && pq->first==page) { + mi_stat_counter_increase(_mi_stats_main.page_no_retire,1); + return; // dont't retire after all + } + } + + _mi_page_free(page, pq, false); +} + + +/* ----------------------------------------------------------- + Initialize the initial free list in a page. + In secure mode we initialize a randomized list by + alternating between slices. +----------------------------------------------------------- */ + +#define MI_MAX_SLICE_SHIFT (6) // at most 64 slices +#define MI_MAX_SLICES (1UL << MI_MAX_SLICE_SHIFT) +#define MI_MIN_SLICES (2) + +static void mi_page_free_list_extend_secure(mi_heap_t* const heap, mi_page_t* const page, const size_t extend, mi_stats_t* const stats) { + UNUSED(stats); + #if (MI_SECURE<=2) + mi_assert_internal(page->free == NULL); + mi_assert_internal(page->local_free == NULL); + #endif + mi_assert_internal(page->capacity + extend <= page->reserved); + void* const page_area = _mi_page_start(_mi_page_segment(page), page, NULL); + const size_t bsize = page->block_size; + + // initialize a randomized free list + // set up `slice_count` slices to alternate between + size_t shift = MI_MAX_SLICE_SHIFT; + while ((extend >> shift) == 0) { + shift--; + } + const size_t slice_count = (size_t)1U << shift; + const size_t slice_extend = extend / slice_count; + mi_assert_internal(slice_extend >= 1); + mi_block_t* blocks[MI_MAX_SLICES]; // current start of the slice + size_t counts[MI_MAX_SLICES]; // available objects in the slice + for (size_t i = 0; i < slice_count; i++) { + blocks[i] = mi_page_block_at(page, page_area, page->capacity + i*slice_extend); + counts[i] = slice_extend; + } + counts[slice_count-1] += (extend % slice_count); // final slice holds the modulus too (todo: distribute evenly?) + + // and initialize the free list by randomly threading through them + // set up first element + size_t current = _mi_heap_random(heap) % slice_count; + counts[current]--; + mi_block_t* const free_start = blocks[current]; + // and iterate through the rest + uintptr_t rnd = heap->random; + for (size_t i = 1; i < extend; i++) { + // call random_shuffle only every INTPTR_SIZE rounds + const size_t round = i%MI_INTPTR_SIZE; + if (round == 0) rnd = _mi_random_shuffle(rnd); + // select a random next slice index + size_t next = ((rnd >> 8*round) & (slice_count-1)); + while (counts[next]==0) { // ensure it still has space + next++; + if (next==slice_count) next = 0; + } + // and link the current block to it + counts[next]--; + mi_block_t* const block = blocks[current]; + blocks[current] = (mi_block_t*)((uint8_t*)block + bsize); // bump to the following block + mi_block_set_next(page, block, blocks[next]); // and set next; note: we may have `current == next` + current = next; + } + // prepend to the free list (usually NULL) + mi_block_set_next(page, blocks[current], page->free); // end of the list + page->free = free_start; + heap->random = _mi_random_shuffle(rnd); +} + +static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, const size_t extend, mi_stats_t* const stats) +{ + UNUSED(stats); + #if (MI_SECURE <= 2) + mi_assert_internal(page->free == NULL); + mi_assert_internal(page->local_free == NULL); + #endif + mi_assert_internal(page->capacity + extend <= page->reserved); + void* const page_area = _mi_page_start(_mi_page_segment(page), page, NULL ); + const size_t bsize = page->block_size; + mi_block_t* const start = mi_page_block_at(page, page_area, page->capacity); + + // initialize a sequential free list + mi_block_t* const last = mi_page_block_at(page, page_area, page->capacity + extend - 1); + mi_block_t* block = start; + while(block <= last) { + mi_block_t* next = (mi_block_t*)((uint8_t*)block + bsize); + mi_block_set_next(page,block,next); + block = next; + } + // prepend to free list (usually `NULL`) + mi_block_set_next(page, last, page->free); + page->free = start; +} + +/* ----------------------------------------------------------- + Page initialize and extend the capacity +----------------------------------------------------------- */ + +#define MI_MAX_EXTEND_SIZE (4*1024) // heuristic, one OS page seems to work well. +#if (MI_SECURE>0) +#define MI_MIN_EXTEND (8*MI_SECURE) // extend at least by this many +#else +#define MI_MIN_EXTEND (1) +#endif + +// Extend the capacity (up to reserved) by initializing a free list +// We do at most `MI_MAX_EXTEND` to avoid touching too much memory +// Note: we also experimented with "bump" allocation on the first +// allocations but this did not speed up any benchmark (due to an +// extra test in malloc? or cache effects?) +static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_stats_t* stats) { + UNUSED(stats); + mi_assert_expensive(mi_page_is_valid_init(page)); + #if (MI_SECURE<=2) + mi_assert(page->free == NULL); + mi_assert(page->local_free == NULL); + if (page->free != NULL) return; + #endif + if (page->capacity >= page->reserved) return; + + size_t page_size; + _mi_page_start(_mi_page_segment(page), page, &page_size); + mi_stat_counter_increase(stats->pages_extended, 1); + + // calculate the extend count + size_t extend = page->reserved - page->capacity; + size_t max_extend = (page->block_size >= MI_MAX_EXTEND_SIZE ? MI_MIN_EXTEND : MI_MAX_EXTEND_SIZE/(uint32_t)page->block_size); + if (max_extend < MI_MIN_EXTEND) max_extend = MI_MIN_EXTEND; + + if (extend > max_extend) { + // ensure we don't touch memory beyond the page to reduce page commit. + // the `lean` benchmark tests this. Going from 1 to 8 increases rss by 50%. + extend = (max_extend==0 ? 1 : max_extend); + } + + mi_assert_internal(extend > 0 && extend + page->capacity <= page->reserved); + mi_assert_internal(extend < (1UL<<16)); + + // and append the extend the free list + if (extend < MI_MIN_SLICES || MI_SECURE==0) { //!mi_option_is_enabled(mi_option_secure)) { + mi_page_free_list_extend(page, extend, stats ); + } + else { + mi_page_free_list_extend_secure(heap, page, extend, stats); + } + // enable the new free list + page->capacity += (uint16_t)extend; + mi_stat_increase(stats->page_committed, extend * page->block_size); + + // extension into zero initialized memory preserves the zero'd free list + if (!page->is_zero_init) { + page->is_zero = false; + } + mi_assert_expensive(mi_page_is_valid_init(page)); +} + +// Initialize a fresh page +static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi_stats_t* stats) { + mi_assert(page != NULL); + mi_segment_t* segment = _mi_page_segment(page); + mi_assert(segment != NULL); + mi_assert_internal(block_size > 0); + // set fields + size_t page_size; + _mi_segment_page_start(segment, page, block_size, &page_size); + page->block_size = block_size; + mi_assert_internal(page_size / block_size < (1L<<16)); + page->reserved = (uint16_t)(page_size / block_size); + #ifdef MI_ENCODE_FREELIST + page->cookie = _mi_heap_random(heap) | 1; + #endif + page->is_zero = page->is_zero_init; + + mi_assert_internal(page->capacity == 0); + mi_assert_internal(page->free == NULL); + mi_assert_internal(page->used == 0); + mi_assert_internal(page->thread_free == 0); + mi_assert_internal(page->thread_freed == 0); + mi_assert_internal(page->next == NULL); + mi_assert_internal(page->prev == NULL); + mi_assert_internal(!mi_page_has_aligned(page)); + #if (MI_ENCODE_FREELIST) + mi_assert_internal(page->cookie != 0); + #endif + mi_assert_expensive(mi_page_is_valid_init(page)); + + // initialize an initial free list + mi_page_extend_free(heap,page,stats); + mi_assert(mi_page_immediate_available(page)); +} + + +/* ----------------------------------------------------------- + Find pages with free blocks +-------------------------------------------------------------*/ + +// Find a page with free blocks of `page->block_size`. +static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* pq) +{ + // search through the pages in "next fit" order + mi_page_t* rpage = NULL; + size_t count = 0; + size_t page_free_count = 0; + mi_page_t* page = pq->first; + while( page != NULL) + { + mi_page_t* next = page->next; // remember next + count++; + + // 0. collect freed blocks by us and other threads + _mi_page_free_collect(page,false); + + // 1. if the page contains free blocks, we are done + if (mi_page_immediate_available(page)) { + // If all blocks are free, we might retire this page instead. + // do this at most 8 times to bound allocation time. + // (note: this can happen if a page was earlier not retired due + // to having neighbours that were mostly full or due to concurrent frees) + if (page_free_count < 8 && mi_page_all_free(page)) { + page_free_count++; + if (rpage != NULL) _mi_page_free(rpage,pq,false); + rpage = page; + page = next; + continue; // and keep looking + } + else { + break; // pick this one + } + } + + // 2. Try to extend + if (page->capacity < page->reserved) { + mi_page_extend_free(heap, page, &heap->tld->stats); + mi_assert_internal(mi_page_immediate_available(page)); + break; + } + + // 3. If the page is completely full, move it to the `mi_pages_full` + // queue so we don't visit long-lived pages too often. + mi_assert_internal(!mi_page_is_in_full(page) && !mi_page_immediate_available(page)); + mi_page_to_full(page,pq); + + page = next; + } // for each page + + mi_stat_counter_increase(heap->tld->stats.searches,count); + + if (page == NULL) { + page = rpage; + rpage = NULL; + } + if (rpage != NULL) { + _mi_page_free(rpage,pq,false); + } + + if (page == NULL) { + page = mi_page_fresh(heap, pq); + } + else { + mi_assert(pq->first == page); + } + mi_assert_internal(page == NULL || mi_page_immediate_available(page)); + return page; +} + + +// Find a page with free blocks of `size`. +static inline mi_page_t* mi_find_free_page(mi_heap_t* heap, size_t size) { + mi_page_queue_t* pq = mi_page_queue(heap,size); + mi_page_t* page = pq->first; + if (page != NULL) { + if ((MI_SECURE >= 3) && page->capacity < page->reserved && ((_mi_heap_random(heap) & 1) == 1)) { + // in secure mode, we extend half the time to increase randomness + mi_page_extend_free(heap, page, &heap->tld->stats); + mi_assert_internal(mi_page_immediate_available(page)); + } + else { + _mi_page_free_collect(page,false); + } + if (mi_page_immediate_available(page)) { + return page; // fast path + } + } + return mi_page_queue_find_free_ex(heap, pq); +} + + +/* ----------------------------------------------------------- + Users can register a deferred free function called + when the `free` list is empty. Since the `local_free` + is separate this is deterministically called after + a certain number of allocations. +----------------------------------------------------------- */ + +static mi_deferred_free_fun* volatile deferred_free = NULL; + +void _mi_deferred_free(mi_heap_t* heap, bool force) { + heap->tld->heartbeat++; + if (deferred_free != NULL && !heap->tld->recurse) { + heap->tld->recurse = true; + deferred_free(force, heap->tld->heartbeat); + heap->tld->recurse = false; + } +} + +void mi_register_deferred_free(mi_deferred_free_fun* fn) mi_attr_noexcept { + deferred_free = fn; +} + + +/* ----------------------------------------------------------- + General allocation +----------------------------------------------------------- */ + +// A huge page is allocated directly without being in a queue. +// Because huge pages contain just one block, and the segment contains +// just that page, we always treat them as abandoned and any thread +// that frees the block can free the whole page and segment directly. +static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size) { + size_t block_size = _mi_os_good_alloc_size(size); + mi_assert_internal(_mi_bin(block_size) == MI_BIN_HUGE); + mi_page_t* page = mi_page_fresh_alloc(heap,NULL,block_size); + if (page != NULL) { + mi_assert_internal(mi_page_immediate_available(page)); + mi_assert_internal(page->block_size == block_size); + mi_assert_internal(_mi_page_segment(page)->page_kind==MI_PAGE_HUGE); + mi_assert_internal(_mi_page_segment(page)->used==1); + mi_assert_internal(_mi_page_segment(page)->thread_id==0); // abandoned, not in the huge queue + mi_atomic_write_ptr(mi_atomic_cast(void*, &page->heap), NULL); + + if (page->block_size > MI_HUGE_OBJ_SIZE_MAX) { + _mi_stat_increase(&heap->tld->stats.giant, block_size); + _mi_stat_counter_increase(&heap->tld->stats.giant_count, 1); + } + else { + _mi_stat_increase(&heap->tld->stats.huge, block_size); + _mi_stat_counter_increase(&heap->tld->stats.huge_count, 1); + } + } + return page; +} + + +// Generic allocation routine if the fast path (`alloc.c:mi_page_malloc`) does not succeed. +void* _mi_malloc_generic(mi_heap_t* heap, size_t size) mi_attr_noexcept +{ + mi_assert_internal(heap != NULL); + + // initialize if necessary + if (mi_unlikely(!mi_heap_is_initialized(heap))) { + mi_thread_init(); // calls `_mi_heap_init` in turn + heap = mi_get_default_heap(); + } + mi_assert_internal(mi_heap_is_initialized(heap)); + + // call potential deferred free routines + _mi_deferred_free(heap, false); + + // free delayed frees from other threads + _mi_heap_delayed_free(heap); + + // huge allocation? + mi_page_t* page; + if (mi_unlikely(size > MI_LARGE_OBJ_SIZE_MAX)) { + if (mi_unlikely(size > PTRDIFF_MAX)) { // we don't allocate more than PTRDIFF_MAX (see ) + page = NULL; + } + else { + page = mi_huge_page_alloc(heap,size); + } + } + else { + // otherwise find a page with free blocks in our size segregated queues + page = mi_find_free_page(heap,size); + } + if (page == NULL) return NULL; // out of memory + + mi_assert_internal(mi_page_immediate_available(page)); + mi_assert_internal(page->block_size >= size); + + // and try again, this time succeeding! (i.e. this should never recurse) + return _mi_page_malloc(heap, page, size); +} diff --git a/runtime/src/mimalloc/c/segment.c b/runtime/src/mimalloc/c/segment.c new file mode 100644 index 00000000000..dcc6a04b398 --- /dev/null +++ b/runtime/src/mimalloc/c/segment.c @@ -0,0 +1,743 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2018, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ +#include "mimalloc.h" +#include "mimalloc-internal.h" +#include "mimalloc-atomic.h" + +#include // memset +#include + +#define MI_PAGE_HUGE_ALIGN (256*1024) + +/* ----------------------------------------------------------- + Segment allocation + We allocate pages inside big OS allocated "segments" + (4mb on 64-bit). This is to avoid splitting VMA's on Linux + and reduce fragmentation on other OS's. Each thread + owns its own segments. + + Currently we have: + - small pages (64kb), 64 in one segment + - medium pages (512kb), 8 in one segment + - large pages (4mb), 1 in one segment + - huge blocks > MI_LARGE_OBJ_SIZE_MAX (512kb) are directly allocated by the OS + + In any case the memory for a segment is virtual and only + committed on demand (i.e. we are careful to not touch the memory + until we actually allocate a block there) + + If a thread ends, it "abandons" pages with used blocks + and there is an abandoned segment list whose segments can + be reclaimed by still running threads, much like work-stealing. +----------------------------------------------------------- */ + + +/* ----------------------------------------------------------- + Queue of segments containing free pages +----------------------------------------------------------- */ + + +#if (MI_DEBUG>1) +static bool mi_segment_queue_contains(const mi_segment_queue_t* queue, mi_segment_t* segment) { + mi_assert_internal(segment != NULL); + mi_segment_t* list = queue->first; + while (list != NULL) { + if (list == segment) break; + mi_assert_internal(list->next==NULL || list->next->prev == list); + mi_assert_internal(list->prev==NULL || list->prev->next == list); + list = list->next; + } + return (list == segment); +} +#endif + +static bool mi_segment_queue_is_empty(const mi_segment_queue_t* queue) { + return (queue->first == NULL); +} + +static void mi_segment_queue_remove(mi_segment_queue_t* queue, mi_segment_t* segment) { + mi_assert_expensive(mi_segment_queue_contains(queue, segment)); + if (segment->prev != NULL) segment->prev->next = segment->next; + if (segment->next != NULL) segment->next->prev = segment->prev; + if (segment == queue->first) queue->first = segment->next; + if (segment == queue->last) queue->last = segment->prev; + segment->next = NULL; + segment->prev = NULL; +} + +static void mi_segment_enqueue(mi_segment_queue_t* queue, mi_segment_t* segment) { + mi_assert_expensive(!mi_segment_queue_contains(queue, segment)); + segment->next = NULL; + segment->prev = queue->last; + if (queue->last != NULL) { + mi_assert_internal(queue->last->next == NULL); + queue->last->next = segment; + queue->last = segment; + } + else { + queue->last = queue->first = segment; + } +} + +static mi_segment_queue_t* mi_segment_free_queue_of_kind(mi_page_kind_t kind, mi_segments_tld_t* tld) { + if (kind == MI_PAGE_SMALL) return &tld->small_free; + else if (kind == MI_PAGE_MEDIUM) return &tld->medium_free; + else return NULL; +} + +static mi_segment_queue_t* mi_segment_free_queue(mi_segment_t* segment, mi_segments_tld_t* tld) { + return mi_segment_free_queue_of_kind(segment->page_kind, tld); +} + +// remove from free queue if it is in one +static void mi_segment_remove_from_free_queue(mi_segment_t* segment, mi_segments_tld_t* tld) { + mi_segment_queue_t* queue = mi_segment_free_queue(segment, tld); // may be NULL + bool in_queue = (queue!=NULL && (segment->next != NULL || segment->prev != NULL || queue->first == segment)); + if (in_queue) { + mi_segment_queue_remove(queue, segment); + } +} + +static void mi_segment_insert_in_free_queue(mi_segment_t* segment, mi_segments_tld_t* tld) { + mi_segment_enqueue(mi_segment_free_queue(segment, tld), segment); +} + + +/* ----------------------------------------------------------- + Invariant checking +----------------------------------------------------------- */ + +#if (MI_DEBUG > 1) +static bool mi_segment_is_in_free_queue(mi_segment_t* segment, mi_segments_tld_t* tld) { + mi_segment_queue_t* queue = mi_segment_free_queue(segment, tld); + bool in_queue = (queue!=NULL && (segment->next != NULL || segment->prev != NULL || queue->first == segment)); + if (in_queue) { + mi_assert_expensive(mi_segment_queue_contains(queue, segment)); + } + return in_queue; +} + +static size_t mi_segment_pagesize(mi_segment_t* segment) { + return ((size_t)1 << segment->page_shift); +} +static bool mi_segment_is_valid(mi_segment_t* segment) { + mi_assert_internal(segment != NULL); + mi_assert_internal(_mi_ptr_cookie(segment) == segment->cookie); + mi_assert_internal(segment->used <= segment->capacity); + mi_assert_internal(segment->abandoned <= segment->used); + size_t nfree = 0; + for (size_t i = 0; i < segment->capacity; i++) { + if (!segment->pages[i].segment_in_use) nfree++; + } + mi_assert_internal(nfree + segment->used == segment->capacity); + mi_assert_internal(segment->thread_id == _mi_thread_id() || (segment->thread_id==0)); // or 0 + mi_assert_internal(segment->page_kind == MI_PAGE_HUGE || + (mi_segment_pagesize(segment) * segment->capacity == segment->segment_size)); + return true; +} +#endif + +/* ----------------------------------------------------------- + Segment size calculations +----------------------------------------------------------- */ + +// Start of the page available memory; can be used on uninitialized pages (only `segment_idx` must be set) +uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t block_size, size_t* page_size) +{ + size_t psize = (segment->page_kind == MI_PAGE_HUGE ? segment->segment_size : (size_t)1 << segment->page_shift); + uint8_t* p = (uint8_t*)segment + page->segment_idx*psize; + + if (page->segment_idx == 0) { + // the first page starts after the segment info (and possible guard page) + p += segment->segment_info_size; + psize -= segment->segment_info_size; + // for small and medium objects, ensure the page start is aligned with the block size (PR#66 by kickunderscore) + if (block_size > 0 && segment->page_kind <= MI_PAGE_MEDIUM) { + size_t adjust = block_size - ((uintptr_t)p % block_size); + if (adjust < block_size) { + p += adjust; + psize -= adjust; + } + mi_assert_internal((uintptr_t)p % block_size == 0); + } + } + + if (MI_SECURE > 1 || (MI_SECURE == 1 && page->segment_idx == segment->capacity - 1)) { + // secure == 1: the last page has an os guard page at the end + // secure > 1: every page has an os guard page + psize -= _mi_os_page_size(); + } + + if (page_size != NULL) *page_size = psize; + mi_assert_internal(_mi_ptr_page(p) == page); + mi_assert_internal(_mi_ptr_segment(p) == segment); + return p; +} + +static size_t mi_segment_size(size_t capacity, size_t required, size_t* pre_size, size_t* info_size) { + /* + if (mi_option_is_enabled(mi_option_secure)) { + // always reserve maximally so the protection falls on + // the same address area, as we need to reuse them from the caches interchangably. + capacity = MI_SMALL_PAGES_PER_SEGMENT; + } + */ + const size_t minsize = sizeof(mi_segment_t) + ((capacity - 1) * sizeof(mi_page_t)) + 16 /* padding */; + size_t guardsize = 0; + size_t isize = 0; + + if (MI_SECURE == 0) { + // normally no guard pages + isize = _mi_align_up(minsize, 16 * MI_MAX_ALIGN_SIZE); + } + else { + // in secure mode, we set up a protected page in between the segment info + // and the page data (and one at the end of the segment) + const size_t page_size = _mi_os_page_size(); + isize = _mi_align_up(minsize, page_size); + guardsize = page_size; + required = _mi_align_up(required, page_size); + } +; + if (info_size != NULL) *info_size = isize; + if (pre_size != NULL) *pre_size = isize + guardsize; + return (required==0 ? MI_SEGMENT_SIZE : _mi_align_up( required + isize + 2*guardsize, MI_PAGE_HUGE_ALIGN) ); +} + + +/* ---------------------------------------------------------------------------- +Segment caches +We keep a small segment cache per thread to increase local +reuse and avoid setting/clearing guard pages in secure mode. +------------------------------------------------------------------------------- */ + +static void mi_segments_track_size(long segment_size, mi_segments_tld_t* tld) { + if (segment_size>=0) _mi_stat_increase(&tld->stats->segments,1); + else _mi_stat_decrease(&tld->stats->segments,1); + tld->count += (segment_size >= 0 ? 1 : -1); + if (tld->count > tld->peak_count) tld->peak_count = tld->count; + tld->current_size += segment_size; + if (tld->current_size > tld->peak_size) tld->peak_size = tld->current_size; +} + + +static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_segments_tld_t* tld) { + segment->thread_id = 0; + mi_segments_track_size(-((long)segment_size),tld); + if (MI_SECURE != 0) { + mi_assert_internal(!segment->mem_is_fixed); + _mi_mem_unprotect(segment, segment->segment_size); // ensure no more guard pages are set + } + _mi_mem_free(segment, segment_size, segment->memid, tld->stats); +} + + +// The thread local segment cache is limited to be at most 1/8 of the peak size of segments in use, +#define MI_SEGMENT_CACHE_FRACTION (8) + +// note: returned segment may be partially reset +static mi_segment_t* mi_segment_cache_pop(size_t segment_size, mi_segments_tld_t* tld) { + if (segment_size != 0 && segment_size != MI_SEGMENT_SIZE) return NULL; + mi_segment_t* segment = tld->cache; + if (segment == NULL) return NULL; + tld->cache_count--; + tld->cache = segment->next; + segment->next = NULL; + mi_assert_internal(segment->segment_size == MI_SEGMENT_SIZE); + _mi_stat_decrease(&tld->stats->segments_cache, 1); + return segment; +} + +static bool mi_segment_cache_full(mi_segments_tld_t* tld) +{ + if (tld->count == 1 && tld->cache_count==0) return false; // always cache at least the final segment of a thread + size_t max_cache = mi_option_get(mi_option_segment_cache); + if (tld->cache_count < max_cache + && tld->cache_count < (1 + (tld->peak_count / MI_SEGMENT_CACHE_FRACTION)) // at least allow a 1 element cache + ) { + return false; + } + // take the opportunity to reduce the segment cache if it is too large (now) + // TODO: this never happens as we check against peak usage, should we use current usage instead? + while (tld->cache_count > max_cache) { //(1 + (tld->peak_count / MI_SEGMENT_CACHE_FRACTION))) { + mi_segment_t* segment = mi_segment_cache_pop(0,tld); + mi_assert_internal(segment != NULL); + if (segment != NULL) mi_segment_os_free(segment, segment->segment_size, tld); + } + return true; +} + +static bool mi_segment_cache_push(mi_segment_t* segment, mi_segments_tld_t* tld) { + mi_assert_internal(!mi_segment_is_in_free_queue(segment, tld)); + mi_assert_internal(segment->next == NULL); + if (segment->segment_size != MI_SEGMENT_SIZE || mi_segment_cache_full(tld)) { + return false; + } + mi_assert_internal(segment->segment_size == MI_SEGMENT_SIZE); + if (!segment->mem_is_fixed && mi_option_is_enabled(mi_option_cache_reset)) { + _mi_mem_reset((uint8_t*)segment + segment->segment_info_size, segment->segment_size - segment->segment_info_size, tld->stats); + } + segment->next = tld->cache; + tld->cache = segment; + tld->cache_count++; + _mi_stat_increase(&tld->stats->segments_cache,1); + return true; +} + +// called by threads that are terminating to free cached segments +void _mi_segment_thread_collect(mi_segments_tld_t* tld) { + mi_segment_t* segment; + while ((segment = mi_segment_cache_pop(0,tld)) != NULL) { + mi_segment_os_free(segment, segment->segment_size, tld); + } + mi_assert_internal(tld->cache_count == 0); + mi_assert_internal(tld->cache == NULL); +} + + +/* ----------------------------------------------------------- + Segment allocation +----------------------------------------------------------- */ + +// Allocate a segment from the OS aligned to `MI_SEGMENT_SIZE` . +static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, size_t page_shift, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) +{ + // calculate needed sizes first + size_t capacity; + if (page_kind == MI_PAGE_HUGE) { + mi_assert_internal(page_shift == MI_SEGMENT_SHIFT && required > 0); + capacity = 1; + } + else { + mi_assert_internal(required == 0); + size_t page_size = (size_t)1 << page_shift; + capacity = MI_SEGMENT_SIZE / page_size; + mi_assert_internal(MI_SEGMENT_SIZE % page_size == 0); + mi_assert_internal(capacity >= 1 && capacity <= MI_SMALL_PAGES_PER_SEGMENT); + } + size_t info_size; + size_t pre_size; + size_t segment_size = mi_segment_size(capacity, required, &pre_size, &info_size); + mi_assert_internal(segment_size >= required); + size_t page_size = (page_kind == MI_PAGE_HUGE ? segment_size : (size_t)1 << page_shift); + + // Try to get it from our thread local cache first + bool eager_delay = (tld->count < (size_t)mi_option_get(mi_option_eager_commit_delay)); + bool eager = !eager_delay && mi_option_is_enabled(mi_option_eager_commit); + bool commit = eager || (page_kind > MI_PAGE_MEDIUM); + bool protection_still_good = false; + bool is_zero = false; + mi_segment_t* segment = mi_segment_cache_pop(segment_size, tld); + if (segment != NULL) { + if (MI_SECURE!=0) { + mi_assert_internal(!segment->mem_is_fixed); + if (segment->page_kind != page_kind) { + _mi_mem_unprotect(segment, segment->segment_size); // reset protection if the page kind differs + } + else { + protection_still_good = true; // otherwise, the guard pages are still in place + } + } + if (!segment->mem_is_committed && page_kind > MI_PAGE_MEDIUM) { + mi_assert_internal(!segment->mem_is_fixed); + _mi_mem_commit(segment, segment->segment_size, &is_zero, tld->stats); + segment->mem_is_committed = true; + } + if (!segment->mem_is_fixed && + (mi_option_is_enabled(mi_option_cache_reset) || mi_option_is_enabled(mi_option_page_reset))) { + bool reset_zero = false; + _mi_mem_unreset(segment, segment->segment_size, &reset_zero, tld->stats); + if (reset_zero) is_zero = true; + } + } + else { + // Allocate the segment from the OS + size_t memid; + bool mem_large = (!eager_delay && (MI_SECURE==0)); // only allow large OS pages once we are no longer lazy + segment = (mi_segment_t*)_mi_mem_alloc_aligned(segment_size, MI_SEGMENT_SIZE, &commit, &mem_large, &is_zero, &memid, os_tld); + if (segment == NULL) return NULL; // failed to allocate + if (!commit) { + // ensure the initial info is committed + bool commit_zero = false; + _mi_mem_commit(segment, info_size, &commit_zero, tld->stats); + if (commit_zero) is_zero = true; + } + segment->memid = memid; + segment->mem_is_fixed = mem_large; + segment->mem_is_committed = commit; + mi_segments_track_size((long)segment_size, tld); + } + mi_assert_internal(segment != NULL && (uintptr_t)segment % MI_SEGMENT_SIZE == 0); + + // zero the segment info (but not the `mem` fields) + ptrdiff_t ofs = offsetof(mi_segment_t,next); + memset((uint8_t*)segment + ofs, 0, info_size - ofs); + + // guard pages + if ((MI_SECURE != 0) && !protection_still_good) { + // in secure mode, we set up a protected page in between the segment info + // and the page data + mi_assert_internal( info_size == pre_size - _mi_os_page_size() && info_size % _mi_os_page_size() == 0); + _mi_mem_protect( (uint8_t*)segment + info_size, (pre_size - info_size) ); + size_t os_page_size = _mi_os_page_size(); + if (MI_SECURE <= 1) { + // and protect the last page too + _mi_mem_protect( (uint8_t*)segment + segment_size - os_page_size, os_page_size ); + } + else { + // protect every page + for (size_t i = 0; i < capacity; i++) { + _mi_mem_protect( (uint8_t*)segment + (i+1)*page_size - os_page_size, os_page_size ); + } + } + } + + // initialize + segment->page_kind = page_kind; + segment->capacity = capacity; + segment->page_shift = page_shift; + segment->segment_size = segment_size; + segment->segment_info_size = pre_size; + segment->thread_id = _mi_thread_id(); + segment->cookie = _mi_ptr_cookie(segment); + for (uint8_t i = 0; i < segment->capacity; i++) { + segment->pages[i].segment_idx = i; + segment->pages[i].is_reset = false; + segment->pages[i].is_committed = commit; + segment->pages[i].is_zero_init = is_zero; + } + _mi_stat_increase(&tld->stats->page_committed, segment->segment_info_size); + //fprintf(stderr,"mimalloc: alloc segment at %p\n", (void*)segment); + return segment; +} + + +static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t* tld) { + UNUSED(force); + //fprintf(stderr,"mimalloc: free segment at %p\n", (void*)segment); + mi_assert(segment != NULL); + mi_segment_remove_from_free_queue(segment,tld); + + mi_assert_expensive(!mi_segment_queue_contains(&tld->small_free, segment)); + mi_assert_expensive(!mi_segment_queue_contains(&tld->medium_free, segment)); + mi_assert(segment->next == NULL); + mi_assert(segment->prev == NULL); + _mi_stat_decrease(&tld->stats->page_committed, segment->segment_info_size); + + // update reset memory statistics + /* + for (uint8_t i = 0; i < segment->capacity; i++) { + mi_page_t* page = &segment->pages[i]; + if (page->is_reset) { + page->is_reset = false; + mi_stat_decrease( tld->stats->reset,mi_page_size(page)); + } + } + */ + + if (!force && mi_segment_cache_push(segment, tld)) { + // it is put in our cache + } + else { + // otherwise return it to the OS + mi_segment_os_free(segment, segment->segment_size, tld); + } +} + +/* ----------------------------------------------------------- + Free page management inside a segment +----------------------------------------------------------- */ + + +static bool mi_segment_has_free(const mi_segment_t* segment) { + return (segment->used < segment->capacity); +} + +static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_stats_t* stats) { + mi_assert_internal(mi_segment_has_free(segment)); + mi_assert_expensive(mi_segment_is_valid(segment)); + for (size_t i = 0; i < segment->capacity; i++) { + mi_page_t* page = &segment->pages[i]; + if (!page->segment_in_use) { + if (page->is_reset || !page->is_committed) { + size_t psize; + uint8_t* start = _mi_page_start(segment, page, &psize); + if (!page->is_committed) { + mi_assert_internal(!segment->mem_is_fixed); + page->is_committed = true; + bool is_zero = false; + _mi_mem_commit(start,psize,&is_zero,stats); + if (is_zero) page->is_zero_init = true; + } + if (page->is_reset) { + mi_assert_internal(!segment->mem_is_fixed); + page->is_reset = false; + bool is_zero = false; + _mi_mem_unreset(start, psize, &is_zero, stats); + if (is_zero) page->is_zero_init = true; + } + } + return page; + } + } + mi_assert(false); + return NULL; +} + + +/* ----------------------------------------------------------- + Free +----------------------------------------------------------- */ + +static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld); + +static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_stats_t* stats) { + UNUSED(stats); + mi_assert_internal(page->segment_in_use); + mi_assert_internal(mi_page_all_free(page)); + mi_assert_internal(page->is_committed); + size_t inuse = page->capacity * page->block_size; + _mi_stat_decrease(&stats->page_committed, inuse); + _mi_stat_decrease(&stats->pages, 1); + + // reset the page memory to reduce memory pressure? + if (!segment->mem_is_fixed && !page->is_reset && mi_option_is_enabled(mi_option_page_reset)) { + size_t psize; + uint8_t* start = _mi_page_start(segment, page, &psize); + page->is_reset = true; + _mi_mem_reset(start, psize, stats); + } + + // zero the page data, but not the segment fields + page->is_zero_init = false; + ptrdiff_t ofs = offsetof(mi_page_t,capacity); + memset((uint8_t*)page + ofs, 0, sizeof(*page) - ofs); + page->segment_in_use = false; + segment->used--; +} + +void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld) +{ + mi_assert(page != NULL); + mi_segment_t* segment = _mi_page_segment(page); + mi_assert_expensive(mi_segment_is_valid(segment)); + + // mark it as free now + mi_segment_page_clear(segment, page, tld->stats); + + if (segment->used == 0) { + // no more used pages; remove from the free list and free the segment + mi_segment_free(segment, force, tld); + } + else { + if (segment->used == segment->abandoned) { + // only abandoned pages; remove from free list and abandon + mi_segment_abandon(segment,tld); + } + else if (segment->used + 1 == segment->capacity) { + mi_assert_internal(segment->page_kind <= MI_PAGE_MEDIUM); // for now we only support small and medium pages + // move back to segments free list + mi_segment_insert_in_free_queue(segment,tld); + } + } +} + + +/* ----------------------------------------------------------- + Abandonment +----------------------------------------------------------- */ + +// When threads terminate, they can leave segments with +// live blocks (reached through other threads). Such segments +// are "abandoned" and will be reclaimed by other threads to +// reuse their pages and/or free them eventually +static volatile _Atomic(mi_segment_t*) abandoned; // = NULL; +static volatile _Atomic(uintptr_t) abandoned_count; // = 0; + +static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { + mi_assert_internal(segment->used == segment->abandoned); + mi_assert_internal(segment->used > 0); + mi_assert_internal(segment->abandoned_next == NULL); + mi_assert_expensive(mi_segment_is_valid(segment)); + + // remove the segment from the free page queue if needed + mi_segment_remove_from_free_queue(segment,tld); + mi_assert_internal(segment->next == NULL && segment->prev == NULL); + + // all pages in the segment are abandoned; add it to the abandoned list + _mi_stat_increase(&tld->stats->segments_abandoned, 1); + mi_segments_track_size(-((long)segment->segment_size), tld); + segment->thread_id = 0; + mi_segment_t* next; + do { + next = (mi_segment_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*,&abandoned)); + mi_atomic_write_ptr(mi_atomic_cast(void*,&segment->abandoned_next), next); + } while (!mi_atomic_cas_ptr_weak(mi_atomic_cast(void*,&abandoned), segment, next)); + mi_atomic_increment(&abandoned_count); +} + +void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) { + mi_assert(page != NULL); + mi_segment_t* segment = _mi_page_segment(page); + mi_assert_expensive(mi_segment_is_valid(segment)); + segment->abandoned++; + _mi_stat_increase(&tld->stats->pages_abandoned, 1); + mi_assert_internal(segment->abandoned <= segment->used); + if (segment->used == segment->abandoned) { + // all pages are abandoned, abandon the entire segment + mi_segment_abandon(segment,tld); + } +} + +bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segments_tld_t* tld) { + uintptr_t reclaimed = 0; + uintptr_t atmost; + if (try_all) { + atmost = abandoned_count+16; // close enough + } + else { + atmost = abandoned_count/8; // at most 1/8th of all outstanding (estimated) + if (atmost < 8) atmost = 8; // but at least 8 + } + + // for `atmost` `reclaimed` abandoned segments... + while(atmost > reclaimed) { + // try to claim the head of the abandoned segments + mi_segment_t* segment; + do { + segment = (mi_segment_t*)abandoned; + } while(segment != NULL && !mi_atomic_cas_ptr_weak(mi_atomic_cast(void*,&abandoned), (mi_segment_t*)segment->abandoned_next, segment)); + if (segment==NULL) break; // stop early if no more segments available + + // got it. + mi_atomic_decrement(&abandoned_count); + segment->thread_id = _mi_thread_id(); + segment->abandoned_next = NULL; + mi_segments_track_size((long)segment->segment_size,tld); + mi_assert_internal(segment->next == NULL && segment->prev == NULL); + mi_assert_expensive(mi_segment_is_valid(segment)); + _mi_stat_decrease(&tld->stats->segments_abandoned,1); + + // add its abandoned pages to the current thread + mi_assert(segment->abandoned == segment->used); + for (size_t i = 0; i < segment->capacity; i++) { + mi_page_t* page = &segment->pages[i]; + if (page->segment_in_use) { + segment->abandoned--; + mi_assert(page->next == NULL); + _mi_stat_decrease(&tld->stats->pages_abandoned, 1); + if (mi_page_all_free(page)) { + // if everything free by now, free the page + mi_segment_page_clear(segment,page,tld->stats); + } + else { + // otherwise reclaim it + _mi_page_reclaim(heap,page); + } + } + } + mi_assert(segment->abandoned == 0); + if (segment->used == 0) { // due to page_clear + mi_segment_free(segment,false,tld); + } + else { + reclaimed++; + // add its free pages to the the current thread free small segment queue + if (segment->page_kind <= MI_PAGE_MEDIUM && mi_segment_has_free(segment)) { + mi_segment_insert_in_free_queue(segment,tld); + } + } + } + return (reclaimed>0); +} + + +/* ----------------------------------------------------------- + Small page allocation +----------------------------------------------------------- */ + +// Allocate a small page inside a segment. +// Requires that the page has free pages +static mi_page_t* mi_segment_page_alloc_in(mi_segment_t* segment, mi_segments_tld_t* tld) { + mi_assert_internal(mi_segment_has_free(segment)); + mi_page_t* page = mi_segment_find_free(segment, tld->stats); + page->segment_in_use = true; + segment->used++; + mi_assert_internal(segment->used <= segment->capacity); + if (segment->used == segment->capacity) { + // if no more free pages, remove from the queue + mi_assert_internal(!mi_segment_has_free(segment)); + mi_segment_remove_from_free_queue(segment,tld); + } + return page; +} + +static mi_page_t* mi_segment_page_alloc(mi_page_kind_t kind, size_t page_shift, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { + mi_segment_queue_t* free_queue = mi_segment_free_queue_of_kind(kind,tld); + if (mi_segment_queue_is_empty(free_queue)) { + mi_segment_t* segment = mi_segment_alloc(0,kind,page_shift,tld,os_tld); + if (segment == NULL) return NULL; + mi_segment_enqueue(free_queue, segment); + } + mi_assert_internal(free_queue->first != NULL); + return mi_segment_page_alloc_in(free_queue->first,tld); +} + +static mi_page_t* mi_segment_small_page_alloc(mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { + return mi_segment_page_alloc(MI_PAGE_SMALL,MI_SMALL_PAGE_SHIFT,tld,os_tld); +} + +static mi_page_t* mi_segment_medium_page_alloc(mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { + return mi_segment_page_alloc(MI_PAGE_MEDIUM, MI_MEDIUM_PAGE_SHIFT, tld, os_tld); +} + +/* ----------------------------------------------------------- + large page allocation +----------------------------------------------------------- */ + +static mi_page_t* mi_segment_large_page_alloc(mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { + mi_segment_t* segment = mi_segment_alloc(0,MI_PAGE_LARGE,MI_LARGE_PAGE_SHIFT,tld,os_tld); + if (segment == NULL) return NULL; + segment->used = 1; + mi_page_t* page = &segment->pages[0]; + page->segment_in_use = true; + return page; +} + +static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) +{ + mi_segment_t* segment = mi_segment_alloc(size, MI_PAGE_HUGE, MI_SEGMENT_SHIFT,tld,os_tld); + if (segment == NULL) return NULL; + mi_assert_internal(segment->segment_size - segment->segment_info_size >= size); + segment->used = 1; + segment->thread_id = 0; // huge pages are immediately abandoned + mi_page_t* page = &segment->pages[0]; + page->segment_in_use = true; + return page; +} + +/* ----------------------------------------------------------- + Page allocation and free +----------------------------------------------------------- */ + +mi_page_t* _mi_segment_page_alloc(size_t block_size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { + mi_page_t* page; + if (block_size <= MI_SMALL_OBJ_SIZE_MAX) { + page = mi_segment_small_page_alloc(tld,os_tld); + } + else if (block_size <= MI_MEDIUM_OBJ_SIZE_MAX) { + page = mi_segment_medium_page_alloc(tld, os_tld); + } + else if (block_size <= MI_LARGE_OBJ_SIZE_MAX) { + page = mi_segment_large_page_alloc(tld, os_tld); + } + else { + page = mi_segment_huge_page_alloc(block_size,tld,os_tld); + } + mi_assert_expensive(page == NULL || mi_segment_is_valid(_mi_page_segment(page))); + return page; +} diff --git a/runtime/src/mimalloc/c/static.c b/runtime/src/mimalloc/c/static.c new file mode 100644 index 00000000000..f8aa4c1e88d --- /dev/null +++ b/runtime/src/mimalloc/c/static.c @@ -0,0 +1,28 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2018, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ +#if !KONAN_MI_MALLOC +#define _DEFAULT_SOURCE + +#include "mimalloc.h" +#include "mimalloc-internal.h" + +// For a static override we create a single object file +// containing the whole library. If it is linked first +// it will override all the standard library allocation +// functions (on Unix's). +#include "stats.c" +#include "os.c" +#include "memory.c" +#include "segment.c" +#include "page.c" +#include "heap.c" +#include "alloc.c" +#include "alloc-aligned.c" +#include "alloc-posix.c" +#include "init.c" +#include "options.c" +#endif \ No newline at end of file diff --git a/runtime/src/mimalloc/c/stats.c b/runtime/src/mimalloc/c/stats.c new file mode 100644 index 00000000000..50bd029db0a --- /dev/null +++ b/runtime/src/mimalloc/c/stats.c @@ -0,0 +1,463 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2018, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ +#include "mimalloc.h" +#include "mimalloc-internal.h" +#include "mimalloc-atomic.h" + +#include // fputs, stderr +#include // memset + + +/* ----------------------------------------------------------- + Statistics operations +----------------------------------------------------------- */ + +static bool mi_is_in_main(void* stat) { + return ((uint8_t*)stat >= (uint8_t*)&_mi_stats_main + && (uint8_t*)stat < ((uint8_t*)&_mi_stats_main + sizeof(mi_stats_t))); +} + +static void mi_stat_update(mi_stat_count_t* stat, int64_t amount) { + if (amount == 0) return; + if (mi_is_in_main(stat)) + { + // add atomically (for abandoned pages) + mi_atomic_add64(&stat->current,amount); + if (stat->current > stat->peak) stat->peak = stat->current; // racing.. it's ok + if (amount > 0) { + mi_atomic_add64(&stat->allocated,amount); + } + else { + mi_atomic_add64(&stat->freed, -amount); + } + } + else { + // add thread local + stat->current += amount; + if (stat->current > stat->peak) stat->peak = stat->current; + if (amount > 0) { + stat->allocated += amount; + } + else { + stat->freed += -amount; + } + } +} + +void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount) { + if (mi_is_in_main(stat)) { + mi_atomic_add64( &stat->count, 1 ); + mi_atomic_add64( &stat->total, (int64_t)amount ); + } + else { + stat->count++; + stat->total += amount; + } +} + +void _mi_stat_increase(mi_stat_count_t* stat, size_t amount) { + mi_stat_update(stat, (int64_t)amount); +} + +void _mi_stat_decrease(mi_stat_count_t* stat, size_t amount) { + mi_stat_update(stat, -((int64_t)amount)); +} + +// must be thread safe as it is called from stats_merge +static void mi_stat_add(mi_stat_count_t* stat, const mi_stat_count_t* src, int64_t unit) { + if (stat==src) return; + mi_atomic_add64( &stat->allocated, src->allocated * unit); + mi_atomic_add64( &stat->current, src->current * unit); + mi_atomic_add64( &stat->freed, src->freed * unit); + // peak scores do not work across threads.. + mi_atomic_add64( &stat->peak, src->peak * unit); +} + +static void mi_stat_counter_add(mi_stat_counter_t* stat, const mi_stat_counter_t* src, int64_t unit) { + if (stat==src) return; + mi_atomic_add64( &stat->total, src->total * unit); + mi_atomic_add64( &stat->count, src->count * unit); +} + +// must be thread safe as it is called from stats_merge +static void mi_stats_add(mi_stats_t* stats, const mi_stats_t* src) { + if (stats==src) return; + mi_stat_add(&stats->segments, &src->segments,1); + mi_stat_add(&stats->pages, &src->pages,1); + mi_stat_add(&stats->reserved, &src->reserved, 1); + mi_stat_add(&stats->committed, &src->committed, 1); + mi_stat_add(&stats->reset, &src->reset, 1); + mi_stat_add(&stats->page_committed, &src->page_committed, 1); + + mi_stat_add(&stats->pages_abandoned, &src->pages_abandoned, 1); + mi_stat_add(&stats->segments_abandoned, &src->segments_abandoned, 1); + mi_stat_add(&stats->threads, &src->threads, 1); + + mi_stat_add(&stats->malloc, &src->malloc, 1); + mi_stat_add(&stats->segments_cache, &src->segments_cache, 1); + mi_stat_add(&stats->huge, &src->huge, 1); + mi_stat_add(&stats->giant, &src->giant, 1); + + mi_stat_counter_add(&stats->pages_extended, &src->pages_extended, 1); + mi_stat_counter_add(&stats->mmap_calls, &src->mmap_calls, 1); + mi_stat_counter_add(&stats->commit_calls, &src->commit_calls, 1); + + mi_stat_counter_add(&stats->page_no_retire, &src->page_no_retire, 1); + mi_stat_counter_add(&stats->searches, &src->searches, 1); + mi_stat_counter_add(&stats->huge_count, &src->huge_count, 1); + mi_stat_counter_add(&stats->giant_count, &src->giant_count, 1); +#if MI_STAT>1 + for (size_t i = 0; i <= MI_BIN_HUGE; i++) { + if (src->normal[i].allocated > 0 || src->normal[i].freed > 0) { + mi_stat_add(&stats->normal[i], &src->normal[i], 1); + } + } +#endif +} + +/* ----------------------------------------------------------- + Display statistics +----------------------------------------------------------- */ + +// unit > 0 : size in binary bytes +// unit == 0: count as decimal +// unit < 0 : count in binary +static void mi_printf_amount(int64_t n, int64_t unit, mi_output_fun* out, const char* fmt) { + char buf[32]; + int len = 32; + const char* suffix = (unit <= 0 ? " " : "b"); + double base = (unit == 0 ? 1000.0 : 1024.0); + if (unit>0) n *= unit; + + double pos = (double)(n < 0 ? -n : n); + if (pos < base) + snprintf(buf,len, "%d %s ", (int)n, suffix); + else if (pos < base*base) + snprintf(buf, len, "%.1f k%s", (double)n / base, suffix); + else if (pos < base*base*base) + snprintf(buf, len, "%.1f m%s", (double)n / (base*base), suffix); + else + snprintf(buf, len, "%.1f g%s", (double)n / (base*base*base), suffix); + + _mi_fprintf(out, (fmt==NULL ? "%11s" : fmt), buf); +} + + +static void mi_print_amount(int64_t n, int64_t unit, mi_output_fun* out) { + mi_printf_amount(n,unit,out,NULL); +} + +static void mi_print_count(int64_t n, int64_t unit, mi_output_fun* out) { + if (unit==1) _mi_fprintf(out,"%11s"," "); + else mi_print_amount(n,0,out); +} + +static void mi_stat_print(const mi_stat_count_t* stat, const char* msg, int64_t unit, mi_output_fun* out ) { + _mi_fprintf(out,"%10s:", msg); + if (unit>0) { + mi_print_amount(stat->peak, unit, out); + mi_print_amount(stat->allocated, unit, out); + mi_print_amount(stat->freed, unit, out); + mi_print_amount(unit, 1, out); + mi_print_count(stat->allocated, unit, out); + if (stat->allocated > stat->freed) + _mi_fprintf(out, " not all freed!\n"); + else + _mi_fprintf(out, " ok\n"); + } + else if (unit<0) { + mi_print_amount(stat->peak, -1, out); + mi_print_amount(stat->allocated, -1, out); + mi_print_amount(stat->freed, -1, out); + if (unit==-1) { + _mi_fprintf(out, "%22s", ""); + } + else { + mi_print_amount(-unit, 1, out); + mi_print_count((stat->allocated / -unit), 0, out); + } + if (stat->allocated > stat->freed) + _mi_fprintf(out, " not all freed!\n"); + else + _mi_fprintf(out, " ok\n"); + } + else { + mi_print_amount(stat->peak, 1, out); + mi_print_amount(stat->allocated, 1, out); + _mi_fprintf(out, "\n"); + } +} + +static void mi_stat_counter_print(const mi_stat_counter_t* stat, const char* msg, mi_output_fun* out ) { + _mi_fprintf(out, "%10s:", msg); + mi_print_amount(stat->total, -1, out); + _mi_fprintf(out, "\n"); +} + +static void mi_stat_counter_print_avg(const mi_stat_counter_t* stat, const char* msg, mi_output_fun* out) { + double avg = (stat->count == 0 ? 0.0 : (double)stat->total / (double)stat->count); + _mi_fprintf(out, "%10s: %7.1f avg\n", msg, avg); +} + + +static void mi_print_header(mi_output_fun* out ) { + _mi_fprintf(out,"%10s: %10s %10s %10s %10s %10s\n", "heap stats", "peak ", "total ", "freed ", "unit ", "count "); +} + +#if MI_STAT>1 +static void mi_stats_print_bins(mi_stat_count_t* all, const mi_stat_count_t* bins, size_t max, const char* fmt, mi_output_fun* out) { + bool found = false; + char buf[64]; + for (size_t i = 0; i <= max; i++) { + if (bins[i].allocated > 0) { + found = true; + int64_t unit = _mi_bin_size((uint8_t)i); + snprintf(buf, 64, "%s %3zu", fmt, i); + mi_stat_add(all, &bins[i], unit); + mi_stat_print(&bins[i], buf, unit, out); + } + } + //snprintf(buf, 64, "%s all", fmt); + //mi_stat_print(all, buf, 1); + if (found) { + _mi_fprintf(out, "\n"); + mi_print_header(out); + } +} +#endif + + +static void mi_process_info(double* utime, double* stime, size_t* peak_rss, size_t* page_faults, size_t* page_reclaim, size_t* peak_commit); + +static void _mi_stats_print(mi_stats_t* stats, double secs, mi_output_fun* out) mi_attr_noexcept { + mi_print_header(out); + #if MI_STAT>1 + mi_stat_count_t normal = { 0,0,0,0 }; + mi_stats_print_bins(&normal, stats->normal, MI_BIN_HUGE, "normal",out); + mi_stat_print(&normal, "normal", 1, out); + mi_stat_print(&stats->huge, "huge", (stats->huge_count.count == 0 ? 1 : -(stats->huge.allocated / stats->huge_count.count)), out); + mi_stat_print(&stats->giant, "giant", (stats->giant_count.count == 0 ? 1 : -(stats->giant.allocated / stats->giant_count.count)), out); + mi_stat_count_t total = { 0,0,0,0 }; + mi_stat_add(&total, &normal, 1); + mi_stat_add(&total, &stats->huge, 1); + mi_stat_add(&total, &stats->giant, 1); + mi_stat_print(&total, "total", 1, out); + _mi_fprintf(out, "malloc requested: "); + mi_print_amount(stats->malloc.allocated, 1, out); + _mi_fprintf(out, "\n\n"); + #endif + mi_stat_print(&stats->reserved, "reserved", 1, out); + mi_stat_print(&stats->committed, "committed", 1, out); + mi_stat_print(&stats->reset, "reset", 1, out); + mi_stat_print(&stats->page_committed, "touched", 1, out); + mi_stat_print(&stats->segments, "segments", -1, out); + mi_stat_print(&stats->segments_abandoned, "-abandoned", -1, out); + mi_stat_print(&stats->segments_cache, "-cached", -1, out); + mi_stat_print(&stats->pages, "pages", -1, out); + mi_stat_print(&stats->pages_abandoned, "-abandoned", -1, out); + mi_stat_counter_print(&stats->pages_extended, "-extended", out); + mi_stat_counter_print(&stats->page_no_retire, "-noretire", out); + mi_stat_counter_print(&stats->mmap_calls, "mmaps", out); + mi_stat_counter_print(&stats->commit_calls, "commits", out); + mi_stat_print(&stats->threads, "threads", -1, out); + mi_stat_counter_print_avg(&stats->searches, "searches", out); + + if (secs >= 0.0) _mi_fprintf(out, "%10s: %9.3f s\n", "elapsed", secs); + + double user_time; + double sys_time; + size_t peak_rss; + size_t page_faults; + size_t page_reclaim; + size_t peak_commit; + mi_process_info(&user_time, &sys_time, &peak_rss, &page_faults, &page_reclaim, &peak_commit); + _mi_fprintf(out,"%10s: user: %.3f s, system: %.3f s, faults: %lu, reclaims: %lu, rss: ", "process", user_time, sys_time, (unsigned long)page_faults, (unsigned long)page_reclaim ); + mi_printf_amount((int64_t)peak_rss, 1, out, "%s"); + if (peak_commit > 0) { + _mi_fprintf(out,", commit charge: "); + mi_printf_amount((int64_t)peak_commit, 1, out, "%s"); + } + _mi_fprintf(out,"\n"); +} + +double _mi_clock_end(double start); +double _mi_clock_start(void); +static double mi_time_start = 0.0; + +static mi_stats_t* mi_stats_get_default(void) { + mi_heap_t* heap = mi_heap_get_default(); + return &heap->tld->stats; +} + +static void mi_stats_merge_from(mi_stats_t* stats) { + if (stats != &_mi_stats_main) { + mi_stats_add(&_mi_stats_main, stats); + memset(stats, 0, sizeof(mi_stats_t)); + } +} + +void mi_stats_reset(void) mi_attr_noexcept { + mi_stats_t* stats = mi_stats_get_default(); + if (stats != &_mi_stats_main) { memset(stats, 0, sizeof(mi_stats_t)); } + memset(&_mi_stats_main, 0, sizeof(mi_stats_t)); + mi_time_start = _mi_clock_start(); +} + +void mi_stats_merge(void) mi_attr_noexcept { + mi_stats_merge_from( mi_stats_get_default() ); +} + +void _mi_stats_done(mi_stats_t* stats) { // called from `mi_thread_done` + mi_stats_merge_from(stats); +} + + +static void mi_stats_print_ex(mi_stats_t* stats, double secs, mi_output_fun* out) { + mi_stats_merge_from(stats); + _mi_stats_print(&_mi_stats_main, secs, out); +} + +void mi_stats_print(mi_output_fun* out) mi_attr_noexcept { + mi_stats_print_ex(mi_stats_get_default(),_mi_clock_end(mi_time_start),out); +} + +void mi_thread_stats_print(mi_output_fun* out) mi_attr_noexcept { + _mi_stats_print(mi_stats_get_default(), _mi_clock_end(mi_time_start), out); +} + + + +// -------------------------------------------------------- +// Basic timer for convenience +// -------------------------------------------------------- + +#ifdef _WIN32 +#include +static double mi_to_seconds(LARGE_INTEGER t) { + static double freq = 0.0; + if (freq <= 0.0) { + LARGE_INTEGER f; + QueryPerformanceFrequency(&f); + freq = (double)(f.QuadPart); + } + return ((double)(t.QuadPart) / freq); +} + +static double mi_clock_now(void) { + LARGE_INTEGER t; + QueryPerformanceCounter(&t); + return mi_to_seconds(t); +} +#else +#include +#ifdef CLOCK_REALTIME +static double mi_clock_now(void) { + struct timespec t; + clock_gettime(CLOCK_REALTIME, &t); + return (double)t.tv_sec + (1.0e-9 * (double)t.tv_nsec); +} +#else +// low resolution timer +static double mi_clock_now(void) { + return ((double)clock() / (double)CLOCKS_PER_SEC); +} +#endif +#endif + + +static double mi_clock_diff = 0.0; + +double _mi_clock_start(void) { + if (mi_clock_diff == 0.0) { + double t0 = mi_clock_now(); + mi_clock_diff = mi_clock_now() - t0; + } + return mi_clock_now(); +} + +double _mi_clock_end(double start) { + double end = mi_clock_now(); + return (end - start - mi_clock_diff); +} + + +// -------------------------------------------------------- +// Basic process statistics +// -------------------------------------------------------- + +#if defined(_WIN32) +#include +#include +#pragma comment(lib,"psapi.lib") + +static double filetime_secs(const FILETIME* ftime) { + ULARGE_INTEGER i; + i.LowPart = ftime->dwLowDateTime; + i.HighPart = ftime->dwHighDateTime; + double secs = (double)(i.QuadPart) * 1.0e-7; // FILETIME is in 100 nano seconds + return secs; +} +static void mi_process_info(double* utime, double* stime, size_t* peak_rss, size_t* page_faults, size_t* page_reclaim, size_t* peak_commit) { + FILETIME ct; + FILETIME ut; + FILETIME st; + FILETIME et; + GetProcessTimes(GetCurrentProcess(), &ct, &et, &st, &ut); + *utime = filetime_secs(&ut); + *stime = filetime_secs(&st); + + PROCESS_MEMORY_COUNTERS info; + GetProcessMemoryInfo(GetCurrentProcess(), &info, sizeof(info)); + *peak_rss = (size_t)info.PeakWorkingSetSize; + *page_faults = (size_t)info.PageFaultCount; + *peak_commit = (size_t)info.PeakPagefileUsage; + *page_reclaim = 0; +} + +#elif defined(__unix__) || defined(__unix) || defined(unix) || (defined(__APPLE__) && defined(__MACH__)) +#include +#include +#include + +#if defined(__APPLE__) && defined(__MACH__) +#include +#endif + +static double timeval_secs(const struct timeval* tv) { + return (double)tv->tv_sec + ((double)tv->tv_usec * 1.0e-6); +} + +static void mi_process_info(double* utime, double* stime, size_t* peak_rss, size_t* page_faults, size_t* page_reclaim, size_t* peak_commit) { + struct rusage rusage; + getrusage(RUSAGE_SELF, &rusage); +#if defined(__APPLE__) && defined(__MACH__) + *peak_rss = rusage.ru_maxrss; +#else + *peak_rss = rusage.ru_maxrss * 1024; +#endif + *page_faults = rusage.ru_majflt; + *page_reclaim = rusage.ru_minflt; + *peak_commit = 0; + *utime = timeval_secs(&rusage.ru_utime); + *stime = timeval_secs(&rusage.ru_stime); +} + +#else +#ifndef __wasi__ +// WebAssembly instances are not processes +#pragma message("define a way to get process info") +#endif + +static void mi_process_info(double* utime, double* stime, size_t* peak_rss, size_t* page_faults, size_t* page_reclaim, size_t* peak_commit) { + *peak_rss = 0; + *page_faults = 0; + *page_reclaim = 0; + *peak_commit = 0; + *utime = 0.0; + *stime = 0.0; +} +#endif diff --git a/runtime/src/opt_alloc/cpp/AllocImpl.cpp b/runtime/src/opt_alloc/cpp/AllocImpl.cpp new file mode 100644 index 00000000000..1bdb93f4d66 --- /dev/null +++ b/runtime/src/opt_alloc/cpp/AllocImpl.cpp @@ -0,0 +1,17 @@ +/* + * Copyright 2010-2019 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license + * that can be found in the LICENSE file. + */ +#include +#include + +extern "C" { +void* mi_calloc(size_t, size_t); +void mi_free(void*); +void* konan_calloc_impl(size_t n_elements, size_t elem_size) { + return mi_calloc(n_elements, elem_size); +} +void konan_free_impl (void* mem) { + mi_free(mem); +} +} // extern "C" diff --git a/runtime/src/std_alloc/cpp/AllocImpl.cpp b/runtime/src/std_alloc/cpp/AllocImpl.cpp new file mode 100644 index 00000000000..ce85a81ecbb --- /dev/null +++ b/runtime/src/std_alloc/cpp/AllocImpl.cpp @@ -0,0 +1,17 @@ +/* + * Copyright 2010-2019 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license + * that can be found in the LICENSE file. + */ +#include +#include + +extern "C" { +// Memory operations. +void* konan_calloc_impl(size_t n_elements, size_t elem_size) { + return calloc(n_elements, elem_size); +} +void konan_free_impl (void* mem) { + free(mem); +} +} + diff --git a/shared/src/main/kotlin/org/jetbrains/kotlin/konan/target/KonanTargetExtenstions.kt b/shared/src/main/kotlin/org/jetbrains/kotlin/konan/target/KonanTargetExtenstions.kt index f736d61e3d8..4ca9734e46e 100644 --- a/shared/src/main/kotlin/org/jetbrains/kotlin/konan/target/KonanTargetExtenstions.kt +++ b/shared/src/main/kotlin/org/jetbrains/kotlin/konan/target/KonanTargetExtenstions.kt @@ -4,4 +4,20 @@ fun KonanTarget.supportsCodeCoverage(): Boolean = this == KonanTarget.MINGW_X64 || this == KonanTarget.LINUX_X64 || this == KonanTarget.MACOS_X64 || - this == KonanTarget.IOS_X64 \ No newline at end of file + this == KonanTarget.IOS_X64 + +fun KonanTarget.supportsMimallocAllocator(): Boolean = + when(this) { + is KonanTarget.LINUX_X64 -> true + is KonanTarget.MINGW_X86 -> true + is KonanTarget.MINGW_X64 -> true + is KonanTarget.MACOS_X64 -> true + is KonanTarget.LINUX_ARM64 -> true + is KonanTarget.LINUX_ARM32_HFP -> true + is KonanTarget.ANDROID_X64 -> true + is KonanTarget.ANDROID_ARM64 -> true + is KonanTarget.IOS_ARM32 -> true + is KonanTarget.IOS_ARM64 -> true + is KonanTarget.IOS_X64 -> true + else -> false // watchOS/tvOS/android_x86/android_arm32 aren't tested; linux_mips32/linux_mipsel32 need linking with libatomic. + } \ No newline at end of file