Skip to content

Commit

Permalink
i#1680 large pages: Replace VMM_BLOCK_SIZE with an option.
Browse files Browse the repository at this point in the history
VMM_BLOCK_SIZE is replaced with INTERNAL_OPTION(vmm_block_size).
This will make it possible to have it depend on the page size.

Review-URL: https://codereview.appspot.com/311760043
  • Loading branch information
egrimley-arm committed Oct 10, 2016
1 parent a73afa6 commit 4fd7afe
Show file tree
Hide file tree
Showing 4 changed files with 69 additions and 55 deletions.
101 changes: 46 additions & 55 deletions core/heap.c
Expand Up @@ -575,30 +575,15 @@ typedef enum {
static void report_low_on_memory(oom_source_t source,
heap_error_code_t os_error_code);

/* virtual memory manager */
enum {VMM_BLOCK_SIZE = IF_WINDOWS_ELSE(64,16)*1024}; /* 64KB or 16KB */
/* Our current allocation unit matches the allocation granularity on
* windows, to avoid worrying about external fragmentation
* Since most of our allocations fall within this range this makes the
* common operation be finding a single empty block.
*
* On Linux we save a lot of wasted alignment space by using a smaller
* granularity (PR 415959).
*
* FIXME: for Windows, if we reserve the whole region up front and
* just commit pieces, why do we need to match the Windows kernel
* alloc granularity?
*/

enum {
/* maximum 512MB virtual memory units */
MAX_VMM_HEAP_UNIT_SIZE = 512*1024*1024,
/* We should normally have only one large unit, so this is in fact
* the maximum we should count on in one process
*/
/* minimum will be used only if an invalid option is set */
MIN_VMM_HEAP_UNIT_SIZE = VMM_BLOCK_SIZE
};
/* minimum will be used only if an invalid option is set */
#define MIN_VMM_HEAP_UNIT_SIZE INTERNAL_OPTION(vmm_block_size)

typedef struct {
vm_addr_t start_addr; /* base virtual address */
Expand All @@ -621,7 +606,7 @@ typedef struct {
exact size - however this field is left last in the structure
in case we do want to save some memory
*/
bitmap_element_t blocks[BITMAP_INDEX(MAX_VMM_HEAP_UNIT_SIZE/VMM_BLOCK_SIZE)];
bitmap_element_t blocks[BITMAP_INDEX(MAX_VMM_HEAP_UNIT_SIZE / MIN_VMM_BLOCK_SIZE)];
} vm_heap_t;

/* We keep our heap management structs on the heap for selfprot (case 8074).
Expand Down Expand Up @@ -655,16 +640,17 @@ static inline
uint
vmm_addr_to_block(vm_heap_t *vmh, vm_addr_t p)
{
ASSERT(CHECK_TRUNCATE_TYPE_uint((p - vmh->start_addr) / VMM_BLOCK_SIZE));
return (uint) ((p - vmh->start_addr) / VMM_BLOCK_SIZE);
ASSERT(CHECK_TRUNCATE_TYPE_uint((p - vmh->start_addr) /
INTERNAL_OPTION(vmm_block_size)));
return (uint) ((p - vmh->start_addr) / INTERNAL_OPTION(vmm_block_size));
}

static inline
vm_addr_t
vmm_block_to_addr(vm_heap_t *vmh, uint block)
{
ASSERT(block >=0 && block < vmh->num_blocks);
return (vm_addr_t)(vmh->start_addr + block*VMM_BLOCK_SIZE);
return (vm_addr_t)(vmh->start_addr + block * INTERNAL_OPTION(vmm_block_size));
}

static bool
Expand Down Expand Up @@ -694,9 +680,9 @@ vmm_dump_map(vm_heap_t *vmh)

LOG(GLOBAL, LOG_HEAP, 1, "\nvmm_dump_map("PFX") virtual regions\n", vmh);
#define VMM_DUMP_MAP_LOG(i, last_i) \
LOG(GLOBAL, LOG_HEAP, 1, PFX"-"PFX" size=%d %s\n", vmm_block_to_addr(vmh, last_i), \
vmm_block_to_addr(vmh, i-1) + VMM_BLOCK_SIZE - 1, \
(i-last_i)*VMM_BLOCK_SIZE, \
LOG(GLOBAL, LOG_HEAP, 1, PFX"-"PFX" size=%d %s\n", vmm_block_to_addr(vmh, last_i), \
vmm_block_to_addr(vmh, i-1) + INTERNAL_OPTION(vmm_block_size) - 1, \
(i-last_i)*INTERNAL_OPTION(vmm_block_size), \
is_used ? "reserved" : "free");

for (i=0; i < bitmap_size; i++) {
Expand Down Expand Up @@ -737,7 +723,7 @@ vmm_heap_unit_init(vm_heap_t *vmh, size_t size)
heap_error_code_t error_code;
ASSIGN_INIT_LOCK_FREE(vmh->lock, vmh_lock);

size = ALIGN_FORWARD(size, VMM_BLOCK_SIZE);
size = ALIGN_FORWARD(size, INTERNAL_OPTION(vmm_block_size));
ASSERT(size <= MAX_VMM_HEAP_UNIT_SIZE);
vmh->alloc_size = size;

Expand All @@ -754,9 +740,10 @@ vmm_heap_unit_init(vm_heap_t *vmh, size_t size)
* non-deterministic. */
/* Make sure we don't waste the lower bits from our random number */
preferred = (DYNAMO_OPTION(vm_base)
+ get_random_offset(DYNAMO_OPTION(vm_max_offset)/VMM_BLOCK_SIZE)
*VMM_BLOCK_SIZE);
preferred = ALIGN_FORWARD(preferred, VMM_BLOCK_SIZE);
+ get_random_offset(DYNAMO_OPTION(vm_max_offset) /
INTERNAL_OPTION(vmm_block_size)) *
INTERNAL_OPTION(vmm_block_size));
preferred = ALIGN_FORWARD(preferred, INTERNAL_OPTION(vmm_block_size));
/* overflow check: w/ vm_base shouldn't happen so debug-only check */
ASSERT(!POINTER_OVERFLOW_ON_ADD(preferred, size));

Expand All @@ -781,19 +768,21 @@ vmm_heap_unit_init(vm_heap_t *vmh, size_t size)
* syslog or assert here
*/
/* need extra size to ensure alignment */
vmh->alloc_size = size + VMM_BLOCK_SIZE;
vmh->alloc_size = size + INTERNAL_OPTION(vmm_block_size);
#ifdef X64
/* PR 215395, make sure allocation satisfies heap reachability contraints */
vmh->alloc_start = os_heap_reserve_in_region
((void *)ALIGN_FORWARD(heap_allowable_region_start, PAGE_SIZE),
(void *)ALIGN_BACKWARD(heap_allowable_region_end, PAGE_SIZE),
size + VMM_BLOCK_SIZE, &error_code,
size + INTERNAL_OPTION(vmm_block_size), &error_code,
true/*+x*/);
#else
vmh->alloc_start = (heap_pc)
os_heap_reserve(NULL, size + VMM_BLOCK_SIZE, &error_code, true/*+x*/);
os_heap_reserve(NULL, size + INTERNAL_OPTION(vmm_block_size),
&error_code, true/*+x*/);
#endif
vmh->start_addr = (heap_pc) ALIGN_FORWARD(vmh->alloc_start, VMM_BLOCK_SIZE);
vmh->start_addr = (heap_pc) ALIGN_FORWARD(vmh->alloc_start,
INTERNAL_OPTION(vmm_block_size));
LOG(GLOBAL, LOG_HEAP, 1, "vmm_heap_unit_init unable to allocate at preferred="
PFX" letting OS place sz=%dM addr="PFX" \n",
preferred, size/(1024*1024), vmh->start_addr);
Expand Down Expand Up @@ -828,14 +817,14 @@ vmm_heap_unit_init(vm_heap_t *vmh, size_t size)
ASSERT_NOT_REACHED();
}
vmh->end_addr = vmh->start_addr + size;
ASSERT_TRUNCATE(vmh->num_blocks, uint, size / VMM_BLOCK_SIZE);
vmh->num_blocks = (uint) (size / VMM_BLOCK_SIZE);
ASSERT_TRUNCATE(vmh->num_blocks, uint, size / INTERNAL_OPTION(vmm_block_size));
vmh->num_blocks = (uint) (size / INTERNAL_OPTION(vmm_block_size));
vmh->num_free_blocks = vmh->num_blocks;
LOG(GLOBAL, LOG_HEAP, 2, "vmm_heap_unit_init ["PFX","PFX") total=%d free=%d\n",
vmh->start_addr, vmh->end_addr, vmh->num_blocks, vmh->num_free_blocks);

/* make sure static bitmap_t size is properly aligned on block boundaries */
ASSERT(ALIGNED(MAX_VMM_HEAP_UNIT_SIZE, VMM_BLOCK_SIZE));
ASSERT(ALIGNED(MAX_VMM_HEAP_UNIT_SIZE, INTERNAL_OPTION(vmm_block_size)));
bitmap_initialize_free(vmh->blocks, vmh->num_blocks);
DOLOG(1, LOG_HEAP, {
vmm_dump_map(vmh);
Expand All @@ -858,7 +847,7 @@ vmm_heap_unit_exit(vm_heap_t *vmh)
DOLOG(1, LOG_HEAP, { vmm_dump_map(vmh); });
ASSERT(bitmap_check_consistency(vmh->blocks,
vmh->num_blocks, vmh->num_free_blocks));
ASSERT(vmh->num_blocks * VMM_BLOCK_SIZE ==
ASSERT(vmh->num_blocks * INTERNAL_OPTION(vmm_block_size) ==
(ptr_uint_t)(vmh->end_addr - vmh->start_addr));

/* In case there are no tombstones we can just free the unit and
Expand All @@ -885,14 +874,14 @@ static
bool
vmm_is_reserved_unit(vm_heap_t *vmh, vm_addr_t p, size_t size)
{
size = ALIGN_FORWARD(size, VMM_BLOCK_SIZE);
size = ALIGN_FORWARD(size, INTERNAL_OPTION(vmm_block_size));
if (p < vmh->start_addr || vmh->end_addr < p/*overflow*/ ||
vmh->end_addr < (p + size))
return false;
ASSERT(CHECK_TRUNCATE_TYPE_uint(size/VMM_BLOCK_SIZE));
ASSERT(CHECK_TRUNCATE_TYPE_uint(size/INTERNAL_OPTION(vmm_block_size)));
ASSERT(bitmap_are_reserved_blocks(vmh->blocks, vmh->num_blocks,
vmm_addr_to_block(vmh, p),
(uint)size/VMM_BLOCK_SIZE));
(uint)size/INTERNAL_OPTION(vmm_block_size)));
return true;
}

Expand Down Expand Up @@ -970,7 +959,7 @@ rel32_reachable_from_vmcode(byte *tgt)
#endif
}

/* Reservations here are done with VMM_BLOCK_SIZE alignment
/* Reservations here are done with INTERNAL_OPTION(vmm_block_size) alignment
* (e.g. 64KB) but the caller is not forced to request at that
* alignment. We explicitly synchronize reservations and decommits
* within the vm_heap_t.
Expand All @@ -986,9 +975,9 @@ vmm_heap_reserve_blocks(vm_heap_t *vmh, size_t size_in)
uint first_block;
size_t size;

size = ALIGN_FORWARD(size_in, VMM_BLOCK_SIZE);
ASSERT_TRUNCATE(request, uint, size/VMM_BLOCK_SIZE);
request = (uint) size/VMM_BLOCK_SIZE;
size = ALIGN_FORWARD(size_in, INTERNAL_OPTION(vmm_block_size));
ASSERT_TRUNCATE(request, uint, size/INTERNAL_OPTION(vmm_block_size));
request = (uint) size/INTERNAL_OPTION(vmm_block_size);

LOG(GLOBAL, LOG_HEAP, 2,
"vmm_heap_reserve_blocks: size=%d => %d in blocks=%d free_blocks~=%d\n",
Expand Down Expand Up @@ -1035,9 +1024,9 @@ vmm_heap_free_blocks(vm_heap_t *vmh, vm_addr_t p, size_t size_in)
uint request;
size_t size;

size = ALIGN_FORWARD(size_in, VMM_BLOCK_SIZE);
ASSERT_TRUNCATE(request, uint, size/VMM_BLOCK_SIZE);
request = (uint) size/VMM_BLOCK_SIZE;
size = ALIGN_FORWARD(size_in, INTERNAL_OPTION(vmm_block_size));
ASSERT_TRUNCATE(request, uint, size/INTERNAL_OPTION(vmm_block_size));
request = (uint) size/INTERNAL_OPTION(vmm_block_size);

LOG(GLOBAL, LOG_HEAP, 2, "vmm_heap_free_blocks: size=%d blocks=%d p="PFX"\n",
size, request, p);
Expand Down Expand Up @@ -1068,7 +1057,7 @@ at_reset_at_vmm_limit()
100 * heapmgt->vmheap.num_free_blocks <
DYNAMO_OPTION(reset_at_vmm_percent_free_limit) * heapmgt->vmheap.num_blocks) ||
(DYNAMO_OPTION(reset_at_vmm_free_limit) != 0 &&
heapmgt->vmheap.num_free_blocks * VMM_BLOCK_SIZE <
heapmgt->vmheap.num_free_blocks * INTERNAL_OPTION(vmm_block_size) <
DYNAMO_OPTION(reset_at_vmm_free_limit));
}

Expand Down Expand Up @@ -1383,7 +1372,7 @@ vmm_heap_init_constraints()
void
vmm_heap_init()
{
IF_WINDOWS(ASSERT(VMM_BLOCK_SIZE == OS_ALLOC_GRANULARITY));
IF_WINDOWS(ASSERT(INTERNAL_OPTION(vmm_block_size) == OS_ALLOC_GRANULARITY));
if (DYNAMO_OPTION(vm_reserve)) {
vmm_heap_unit_init(&heapmgt->vmheap, DYNAMO_OPTION(vm_size));
}
Expand All @@ -1401,8 +1390,8 @@ vmm_heap_exit()
uint perstack =
ALIGN_FORWARD_UINT(dynamo_options.stack_size +
(dynamo_options.guard_pages ? (2*PAGE_SIZE) : 0),
VMM_BLOCK_SIZE) /
VMM_BLOCK_SIZE;
INTERNAL_OPTION(vmm_block_size)) /
INTERNAL_OPTION(vmm_block_size);
uint unfreed_blocks = perstack * 1 /* initstack */ +
/* current stack */
perstack * ((doing_detach IF_APP_EXPORTS(|| dr_api_exit)) ? 0 : 1);
Expand Down Expand Up @@ -2135,7 +2124,7 @@ heap_mmap_ex(size_t reserve_size, size_t commit_size, uint prot, bool guarded)
ASSERT(!DYNAMO_OPTION(vm_reserve) ||
!DYNAMO_OPTION(stack_shares_gencode) ||
(ptr_uint_t)p - (guarded ? (GUARD_PAGE_ADJUSTMENT/2) : 0) ==
ALIGN_BACKWARD(p, VMM_BLOCK_SIZE) ||
ALIGN_BACKWARD(p, INTERNAL_OPTION(vmm_block_size)) ||
at_reset_at_vmm_limit());
LOG(GLOBAL, LOG_HEAP, 2, "heap_mmap: %d bytes [/ %d] @ "PFX"\n",
commit_size, reserve_size, p);
Expand Down Expand Up @@ -2197,7 +2186,8 @@ heap_mmap_reserve_post_stack(dcontext_t *dcontext,
ASSERT(reserve_size > 0 && commit_size < reserve_size);
/* 1.5 * guard page adjustment since we'll share the middle one */
if (DYNAMO_OPTION(stack_size) + reserve_size +
GUARD_PAGE_ADJUSTMENT + GUARD_PAGE_ADJUSTMENT / 2 > VMM_BLOCK_SIZE) {
GUARD_PAGE_ADJUSTMENT +
GUARD_PAGE_ADJUSTMENT / 2 > INTERNAL_OPTION(vmm_block_size)) {
/* there's not enough room to share the allocation block, stack is too big */
LOG(GLOBAL, LOG_HEAP, 1, "Not enough room to allocate 0x%08x bytes post stack "
"of size 0x%08x\n", reserve_size, DYNAMO_OPTION(stack_size));
Expand Down Expand Up @@ -2296,7 +2286,7 @@ heap_mmap_reserve_post_stack(dcontext_t *dcontext,
dynamo_vm_areas_unlock();
/* We rely on this for freeing in absence of dcontext */
ASSERT((ptr_uint_t)p - GUARD_PAGE_ADJUSTMENT/2 !=
ALIGN_BACKWARD(p, VMM_BLOCK_SIZE));
ALIGN_BACKWARD(p, INTERNAL_OPTION(vmm_block_size)));
#ifdef DEBUG_MEMORY
memset(p, HEAP_ALLOCATED_BYTE, commit_size);
#endif
Expand All @@ -2322,14 +2312,15 @@ heap_munmap_post_stack(dcontext_t *dcontext, void *p, size_t reserve_size)
DYNAMO_OPTION(vm_reserve) && DYNAMO_OPTION(stack_shares_gencode)) {
bool at_stack_end = (p == dcontext->dstack + GUARD_PAGE_ADJUSTMENT/2);
bool at_block_start = ((ptr_uint_t)p - GUARD_PAGE_ADJUSTMENT/2 ==
ALIGN_BACKWARD(p, VMM_BLOCK_SIZE));
ALIGN_BACKWARD(p, INTERNAL_OPTION(vmm_block_size)));
ASSERT((at_stack_end && !at_block_start) ||
(!at_stack_end && at_block_start));
}
});
if (!DYNAMO_OPTION(vm_reserve) ||
!DYNAMO_OPTION(stack_shares_gencode) ||
(ptr_uint_t)p - GUARD_PAGE_ADJUSTMENT/2 == ALIGN_BACKWARD(p, VMM_BLOCK_SIZE)) {
(ptr_uint_t)p - GUARD_PAGE_ADJUSTMENT/2 ==
ALIGN_BACKWARD(p, INTERNAL_OPTION(vmm_block_size))) {
heap_munmap(p, reserve_size);
} else {
/* Detach makes it a pain to pass in the commit size so
Expand Down
2 changes: 2 additions & 0 deletions core/heap.h
Expand Up @@ -254,6 +254,8 @@ void global_unprotected_heap_free(void *p, size_t size HEAPACCT(which_heap_t whi
#define NONPERSISTENT_HEAP_TYPE_FREE(dc, p, type, which) \
NONPERSISTENT_HEAP_ARRAY_FREE(dc, p, type, 1, which)

#define MIN_VMM_BLOCK_SIZE (16 * 1024)

/* special heap of same-sized blocks that avoids global locks */
void *special_heap_init(uint block_size, bool use_lock, bool executable,
bool persistent);
Expand Down
6 changes: 6 additions & 0 deletions core/options.c
Expand Up @@ -866,6 +866,12 @@ check_option_compatibility_helper(int recurse_count)
{
bool changed_options = false;
#ifdef EXPOSE_INTERNAL_OPTIONS
if (INTERNAL_OPTION(vmm_block_size) < MIN_VMM_BLOCK_SIZE) {
USAGE_ERROR("vmm_block_size (%d) must be >= %d, setting to min",
INTERNAL_OPTION(vmm_block_size), MIN_VMM_BLOCK_SIZE);
dynamo_options.vmm_block_size = MIN_VMM_BLOCK_SIZE;
changed_options = true;
}
if (!INTERNAL_OPTION(inline_calls) && !DYNAMO_OPTION(disable_traces)) {
/* cannot disable inlining of calls and build traces (currently) */
USAGE_ERROR("-no_inline_calls not compatible with -disable_traces, setting to default");
Expand Down
15 changes: 15 additions & 0 deletions core/optionsx.h
Expand Up @@ -1101,6 +1101,21 @@

OPTION_INTERNAL(bool, simulate_contention, "simulate lock contention for testing purposes only")

/* Virtual memory manager.
* Our current default allocation unit matches the allocation granularity on
* windows, to avoid worrying about external fragmentation
* Since most of our allocations fall within this range this makes the
* common operation be finding a single empty block.
*
* On Linux we save a lot of wasted alignment space by using a smaller
* granularity (PR 415959).
*
* FIXME: for Windows, if we reserve the whole region up front and
* just commit pieces, why do we need to match the Windows kernel
* alloc granularity?
*/
OPTION_DEFAULT_INTERNAL(uint_size, vmm_block_size, (IF_WINDOWS_ELSE(64,16)*1024),
"allocation unit for virtual memory manager")
OPTION_DEFAULT_INTERNAL(uint_size, initial_heap_unit_size, 32*1024, "initial private heap unit size")
OPTION_DEFAULT_INTERNAL(uint_size, initial_global_heap_unit_size, 32*1024, "initial global heap unit size")
/* if this is too small then once past the vm reservation we have too many
Expand Down

0 comments on commit 4fd7afe

Please sign in to comment.