diff --git a/rts/Lua/LuaMemPool.cpp b/rts/Lua/LuaMemPool.cpp index b252c1acdba..1e18e34da25 100644 --- a/rts/Lua/LuaMemPool.cpp +++ b/rts/Lua/LuaMemPool.cpp @@ -22,8 +22,9 @@ // global, affects all pool instances bool LuaMemPool::enabled = false; -static LuaMemPool gSharedPool(-1); +static LuaMemPool* gSharedPool = nullptr; +static std::array gSharedPoolMem; static std::vector gPools; static std::vector gIndcs; static std::atomic gCount = {0}; @@ -38,7 +39,7 @@ static bool AllocExternal(size_t size) { return (!LuaMemPool::enabled || !AllocI size_t LuaMemPool::GetPoolCount() { return (gCount.load()); } -LuaMemPool* LuaMemPool::GetSharedPtr() { return &gSharedPool; } +LuaMemPool* LuaMemPool::GetSharedPtr() { return gSharedPool; } LuaMemPool* LuaMemPool::AcquirePtr(bool shared, bool owned) { LuaMemPool* p = GetSharedPtr(); @@ -86,8 +87,8 @@ void LuaMemPool::ReleasePtr(LuaMemPool* p, const CLuaHandle* o) gMutex.unlock(); } -void LuaMemPool::FreeShared() { gSharedPool.Clear(); } -void LuaMemPool::InitStatic(bool enable) { LuaMemPool::enabled = enable; } +void LuaMemPool::FreeShared() { gSharedPool->Clear(); } +void LuaMemPool::InitStatic(bool enable) { gSharedPool = new (gSharedPoolMem.data()) LuaMemPool(LuaMemPool::enabled = enable); } void LuaMemPool::KillStatic() { for (LuaMemPool*& p: gPools) { @@ -96,15 +97,19 @@ void LuaMemPool::KillStatic() gPools.clear(); gIndcs.clear(); + + spring::SafeDestruct(gSharedPool); } +LuaMemPool::LuaMemPool(bool isEnabled): LuaMemPool(size_t(-1)) { assert(isEnabled == LuaMemPool::enabled); } LuaMemPool::LuaMemPool(size_t lmpIndex): globalIndex(lmpIndex) { if (!LuaMemPool::enabled) return; + poolImpl.Init(); Reserve(16384); } @@ -112,24 +117,34 @@ LuaMemPool::LuaMemPool(size_t lmpIndex): globalIndex(lmpIndex) void LuaMemPool::LogStats(const char* handle, const char* lctype) const { LOG( - "[LuaMemPool::%s][handle=%s (%s)] index=%lu {blocks,sizes}={%lu,%lu} {int,ext,rec}Allocs={%lu,%lu,%lu} {chunk,block}Bytes={%lu,%lu}", + #if (LMP_USE_CHUNK_TABLE == 1) + "[LuaMemPool::%s][handle=%s (%s)] index=%zu {blocks,sizes}={%zu,%zu} {int,ext,rec}Allocs={%zu,%zu,%zu} {chunk,block}Bytes={%zu,%zu}", + #else + "[LuaMemPool::%s][handle=%s (%s)] index=%zu {numAllocs[*],allocSums[*]}={%u,%u} {int,ext,rec}Allocs={%zu,%zu,%zu} {chunk,block}Bytes={%zu,%zu}", + #endif __func__, handle, lctype, - (unsigned long) globalIndex, - (unsigned long) allocBlocks.size(), - (unsigned long) chunkCountTable.size(), - (unsigned long) allocStats[STAT_NIA], - (unsigned long) allocStats[STAT_NEA], - (unsigned long) allocStats[STAT_NRA], - (unsigned long) allocStats[STAT_NCB], - (unsigned long) allocStats[STAT_NBB] + globalIndex, + #if (LMP_USE_CHUNK_TABLE == 1) + allocBlocks.size(), + chunkCountTable.size(), + #else + poolImpl.numAllocs[PoolImpl::NUM_POOLS], + poolImpl.allocSums[PoolImpl::NUM_POOLS], + #endif + allocStats[STAT_NIA], + allocStats[STAT_NEA], + allocStats[STAT_NRA], + allocStats[STAT_NCB], + allocStats[STAT_NBB] ); } void LuaMemPool::DeleteBlocks() { + #if (LMP_USE_CHUNK_TABLE == 1) #if 1 for (void* p: allocBlocks) { ::operator delete(p); @@ -137,6 +152,7 @@ void LuaMemPool::DeleteBlocks() allocBlocks.clear(); #endif + #endif } void* LuaMemPool::Alloc(size_t size) @@ -149,6 +165,7 @@ void* LuaMemPool::Alloc(size_t size) allocStats[STAT_NIA] += 1; allocStats[STAT_NCB] += (size = std::max(size, size_t(MIN_ALLOC_SIZE))); + #if (LMP_USE_CHUNK_TABLE == 1) auto freeChunksTablePair = std::make_pair(freeChunksTable.find(size), false); if (freeChunksTablePair.first == freeChunksTable.end()) @@ -191,6 +208,9 @@ void* LuaMemPool::Alloc(size_t size) allocStats[STAT_NBB] += numBytes; return newBlock; + #else + return (poolImpl.Alloc(size)); + #endif } void* LuaMemPool::Realloc(void* ptr, size_t nsize, size_t osize) @@ -219,7 +239,175 @@ void LuaMemPool::Free(void* ptr, size_t size) allocStats[STAT_NCB] -= (size = std::max(size, size_t(MIN_ALLOC_SIZE))); + #if (LMP_USE_CHUNK_TABLE == 1) *(void**) ptr = freeChunksTable[size]; freeChunksTable[size] = ptr; + #else + poolImpl.Free(ptr, size); + #endif +} + + + + +void LuaMemPool::PoolImpl::Init() { + poolPtrs.fill(nullptr); + numAllocs.fill(0); + allocSums.fill(0); + + poolPtrs[ 0] = NewPool< 0>(); + poolPtrs[ 1] = NewPool< 1>(); + poolPtrs[ 2] = NewPool< 2>(); + poolPtrs[ 3] = NewPool< 3>(); + poolPtrs[ 4] = NewPool< 4>(); + poolPtrs[ 5] = NewPool< 5>(); + poolPtrs[ 6] = NewPool< 6>(); + poolPtrs[ 7] = NewPool< 7>(); + poolPtrs[ 8] = NewPool< 8>(); + poolPtrs[ 9] = NewPool< 9>(); + poolPtrs[10] = NewPool<10>(); + poolPtrs[11] = NewPool<11>(); + poolPtrs[12] = NewPool<12>(); + poolPtrs[13] = NewPool<13>(); + poolPtrs[14] = NewPool<14>(); + poolPtrs[15] = NewPool<15>(); + poolPtrs[16] = NewPool<16>(); + poolPtrs[17] = NewPool<17>(); + poolPtrs[18] = NewPool<18>(); + poolPtrs[19] = NewPool<19>(); + poolPtrs[20] = NewPool<20>(); + poolPtrs[21] = NewPool<21>(); + poolPtrs[22] = NewPool<22>(); + poolPtrs[23] = NewPool<23>(); + poolPtrs[24] = NewPool<24>(); + poolPtrs[25] = NewPool<25>(); + poolPtrs[26] = NewPool<26>(); +} + +void LuaMemPool::PoolImpl::Kill() { + KillPool< 0>(); + KillPool< 1>(); + KillPool< 2>(); + KillPool< 3>(); + KillPool< 4>(); + KillPool< 5>(); + KillPool< 6>(); + KillPool< 7>(); + KillPool< 8>(); + KillPool< 9>(); + KillPool<10>(); + KillPool<11>(); + KillPool<12>(); + KillPool<13>(); + KillPool<14>(); + KillPool<15>(); + KillPool<16>(); + KillPool<17>(); + KillPool<18>(); + KillPool<19>(); + KillPool<20>(); + KillPool<21>(); + KillPool<22>(); + KillPool<23>(); + KillPool<24>(); + KillPool<25>(); + KillPool<26>(); + + poolPtrs.fill(nullptr); +} + + +void* LuaMemPool::PoolImpl::Alloc(uint32_t size) { + const uint32_t subPoolIndex = CalcPoolIndex(size); + + numAllocs[subPoolIndex] += 1; + allocSums[subPoolIndex] += size; + numAllocs[ NUM_POOLS] += 1; + allocSums[ NUM_POOLS] += size; + + switch (subPoolIndex) { + case 0: { return (GetPool< 0>()->allocMem(size)); } break; + case 1: { return (GetPool< 1>()->allocMem(size)); } break; + case 2: { return (GetPool< 2>()->allocMem(size)); } break; + case 3: { return (GetPool< 3>()->allocMem(size)); } break; + case 4: { return (GetPool< 4>()->allocMem(size)); } break; + case 5: { return (GetPool< 5>()->allocMem(size)); } break; + case 6: { return (GetPool< 6>()->allocMem(size)); } break; + case 7: { return (GetPool< 7>()->allocMem(size)); } break; + case 8: { return (GetPool< 8>()->allocMem(size)); } break; + case 9: { return (GetPool< 9>()->allocMem(size)); } break; + case 10: { return (GetPool<10>()->allocMem(size)); } break; + case 11: { return (GetPool<11>()->allocMem(size)); } break; + case 12: { return (GetPool<12>()->allocMem(size)); } break; + case 13: { return (GetPool<13>()->allocMem(size)); } break; + case 14: { return (GetPool<14>()->allocMem(size)); } break; + case 15: { return (GetPool<15>()->allocMem(size)); } break; + case 16: { return (GetPool<16>()->allocMem(size)); } break; + case 17: { return (GetPool<17>()->allocMem(size)); } break; + case 18: { return (GetPool<18>()->allocMem(size)); } break; + case 19: { return (GetPool<19>()->allocMem(size)); } break; + case 20: { return (GetPool<20>()->allocMem(size)); } break; + case 21: { return (GetPool<21>()->allocMem(size)); } break; + case 22: { return (GetPool<22>()->allocMem(size)); } break; + case 23: { return (GetPool<23>()->allocMem(size)); } break; + case 24: { return (GetPool<24>()->allocMem(size)); } break; + case 25: { return (GetPool<25>()->allocMem(size)); } break; + case 26: { return (GetPool<26>()->allocMem(size)); } break; + case 27: { } break; + case 28: { } break; + case 29: { } break; + case 30: { } break; + case 31: { } break; + default: { } break; + } + + // allocation too large, handle externally + return nullptr; +} + +void LuaMemPool::PoolImpl::Free(void* ptr, uint32_t size) { + const uint32_t subPoolIndex = CalcPoolIndex(size); + + numAllocs[subPoolIndex] -= 1; + allocSums[subPoolIndex] -= size; + numAllocs[ NUM_POOLS] -= 1; + allocSums[ NUM_POOLS] -= size; + + assert(ptr != nullptr); + + switch (subPoolIndex) { + case 0: { return (GetPool< 0>()->freeMem(ptr)); } break; + case 1: { return (GetPool< 1>()->freeMem(ptr)); } break; + case 2: { return (GetPool< 2>()->freeMem(ptr)); } break; + case 3: { return (GetPool< 3>()->freeMem(ptr)); } break; + case 4: { return (GetPool< 4>()->freeMem(ptr)); } break; + case 5: { return (GetPool< 5>()->freeMem(ptr)); } break; + case 6: { return (GetPool< 6>()->freeMem(ptr)); } break; + case 7: { return (GetPool< 7>()->freeMem(ptr)); } break; + case 8: { return (GetPool< 8>()->freeMem(ptr)); } break; + case 9: { return (GetPool< 9>()->freeMem(ptr)); } break; + case 10: { return (GetPool<10>()->freeMem(ptr)); } break; + case 11: { return (GetPool<11>()->freeMem(ptr)); } break; + case 12: { return (GetPool<12>()->freeMem(ptr)); } break; + case 13: { return (GetPool<13>()->freeMem(ptr)); } break; + case 14: { return (GetPool<14>()->freeMem(ptr)); } break; + case 15: { return (GetPool<15>()->freeMem(ptr)); } break; + case 16: { return (GetPool<16>()->freeMem(ptr)); } break; + case 17: { return (GetPool<17>()->freeMem(ptr)); } break; + case 18: { return (GetPool<18>()->freeMem(ptr)); } break; + case 19: { return (GetPool<19>()->freeMem(ptr)); } break; + case 20: { return (GetPool<20>()->freeMem(ptr)); } break; + case 21: { return (GetPool<21>()->freeMem(ptr)); } break; + case 22: { return (GetPool<22>()->freeMem(ptr)); } break; + case 23: { return (GetPool<23>()->freeMem(ptr)); } break; + case 24: { return (GetPool<24>()->freeMem(ptr)); } break; + case 25: { return (GetPool<25>()->freeMem(ptr)); } break; + case 26: { return (GetPool<26>()->freeMem(ptr)); } break; + case 27: { } break; + case 28: { } break; + case 29: { } break; + case 30: { } break; + case 31: { } break; + } } diff --git a/rts/Lua/LuaMemPool.h b/rts/Lua/LuaMemPool.h index c0f3a613259..2f946c02308 100644 --- a/rts/Lua/LuaMemPool.h +++ b/rts/Lua/LuaMemPool.h @@ -6,13 +6,18 @@ #include #include +#include "Sim/Misc/SimObjectMemPool.h" +#include "System/bitops.h" #include "System/UnorderedMap.hpp" +#define LMP_USE_CHUNK_TABLE 0 + class CLuaHandle; class LuaMemPool { public: - LuaMemPool(size_t lmpIndex); - ~LuaMemPool() { Clear(); } + explicit LuaMemPool(bool isEnabled); + explicit LuaMemPool(size_t lmpIndex); + ~LuaMemPool() { Clear(); poolImpl.Kill(); } LuaMemPool(const LuaMemPool& p) = delete; LuaMemPool(LuaMemPool&& p) = delete; @@ -39,12 +44,14 @@ class LuaMemPool { } void Reserve(size_t size) { + #if (LMP_USE_CHUNK_TABLE == 1) freeChunksTable.reserve(size); chunkCountTable.reserve(size); #if 1 allocBlocks.reserve(size / 16); #endif + #endif } void DeleteBlocks(); @@ -62,8 +69,10 @@ class LuaMemPool { } void ClearTables() { + #if (LMP_USE_CHUNK_TABLE == 1) freeChunksTable.clear(); chunkCountTable.clear(); + #endif } size_t GetGlobalIndex() const { return globalIndex; } @@ -72,15 +81,79 @@ class LuaMemPool { public: static constexpr size_t MIN_ALLOC_SIZE = sizeof(void*); - static constexpr size_t MAX_ALLOC_SIZE = (1024 * 1024) - 1; + static constexpr size_t MAX_ALLOC_SIZE = 1 << 26; + // static constexpr size_t MAX_ALLOC_SIZE = (1024 * 1024) - 1; static bool enabled; private: + #if (LMP_USE_CHUNK_TABLE == 1) spring::unsynced_map freeChunksTable; spring::unsynced_map chunkCountTable; std::vector allocBlocks; + #endif + + + #if (LMP_USE_CHUNK_TABLE == 0) + struct PoolImpl { + public: + static constexpr uint32_t NUM_POOLS = 32; + + // all N's intentionally over-dimensioned by a factor 1<<10 + static constexpr std::array NUM_CHUNKS = { + 1 << 12, 1 << 12, 1 << 12, 1 << 12, 1 << 12, 1 << 12, 1 << 12, 1 << 12, + 1 << 13, 1 << 13, 1 << 13, 1 << 13, 1 << 13, 1 << 13, 1 << 13, 1 << 13, + 1 << 14, 1 << 14, 1 << 14, 1 << 14, 1 << 14, 1 << 14, 1 << 14, 1 << 14, + 1 << 15, 1 << 15, 1 << 15, 1 << 15, 1 << 15, 1 << 15, 1 << 15, 1 << 15, + }; + static constexpr std::array NUM_PAGES = { + 1 << 15, 1 << 15, 1 << 14, 1 << 14, 1 << 13, 1 << 13, 1 << 12, 1 << 12, + 1 << 11, 1 << 11, 1 << 10, 1 << 10, 1 << 9, 1 << 9, 1 << 8, 1 << 8, + 1 << 7, 1 << 7, 1 << 6, 1 << 6, 1 << 5, 1 << 5, 1 << 4, 1 << 4, + 1 << 3, 1 << 3, 1 << 2, 1 << 2, 1 << 1, 1 << 1, 1 << 0, 1 << 0, + }; + + std::array)], NUM_POOLS> memPools; + std::array poolPtrs; + + std::array numAllocs; + std::array allocSums; + + public: + static uint32_t CalcPoolIndex(uint32_t alloc) { + // skip first few pools due to page-overhead + return (std::max(2u + (MIN_ALLOC_SIZE == 8), log_base_2(alloc))); + } + + + template> + PoolType* NewPool() { + static_assert(sizeof(memPools[i]) >= sizeof(PoolType), ""); + return (new (memPools[i]) PoolType()); + } + + template> + PoolType* GetPool() { + return (static_cast(poolPtrs[i])); + } + + template> + void KillPool() { + GetPool()->~PoolType(); + } + + + void Init(); + void Kill(); + + void* Alloc(uint32_t size); + void Free(void* ptr, uint32_t size); + }; + + PoolImpl poolImpl; + #endif + enum { STAT_NIA = 0, // number of internal allocs diff --git a/rts/Sim/Misc/SimObjectMemPool.h b/rts/Sim/Misc/SimObjectMemPool.h index 36957986d6b..b406d10a59f 100644 --- a/rts/Sim/Misc/SimObjectMemPool.h +++ b/rts/Sim/Misc/SimObjectMemPool.h @@ -103,6 +103,10 @@ template struct DynMemPool { // fixed-size dynamic version +// page size per chunk, number of chunks, number of pages per chunk +// at most simultaneous allocations can be made from a pool +// of size NxK, each of which consumes S bytes (N chunks with every +// chunk consuming S * K bytes) excluding overhead template struct FixedDynMemPool { public: template T* alloc(A&&... a) { @@ -122,6 +126,8 @@ template struct FixedDynMemPool { chunks[num_chunks].reset(new t_chunk_mem()); // reserve new indices; in reverse order since each will be popped from the back + indcs.reserve(K); + for (size_t j = 0; j < K; j++) { indcs.push_back((num_chunks + 1) * K - j - 1); } diff --git a/rts/System/bitops.h b/rts/System/bitops.h index 448cf054bba..3e71337f751 100644 --- a/rts/System/bitops.h +++ b/rts/System/bitops.h @@ -37,6 +37,21 @@ static inline unsigned int next_power_of_2(unsigned int x) #endif } +static inline unsigned int log_base_2(unsigned int value) { + constexpr unsigned int mantissaShift = 23; + constexpr unsigned int exponentBias = 126; + union U { + unsigned int x; + float y; + } u = {0}; + + // value is constrained to [0,32] + u.y = static_cast(value - 1); + + // extract exponent bit + return (((u.x & (0xFF << mantissaShift)) >> mantissaShift) - exponentBias); +} + /** * @brief Count bits set * @param w Number in which to count bits