Skip to content

Commit

Permalink
Improve reporting memory usage
Browse files Browse the repository at this point in the history
Disabled Serializer.mo and wrote a dedicated memory usage counter. It
also tries to account for GC overhead (allocating 24 bytes actually
results in 32 bytes being allocated).

Added -d=execstatGCcollect, which adds a GC.gcollect() and an extra
notification for each execstat event so we can track how much memory is
in use for each phase. This has a roughly 3x performance penalty as full
GC is expensive. -d=reportSerializedSize is even slower though, and
together they cause a 10x hit to performance.

Belonging to [master]:
  - OpenModelica/OMCompiler#2218
  • Loading branch information
sjoelund authored and OpenModelica-Hudson committed Feb 19, 2018
1 parent 9c759d9 commit 526b822
Show file tree
Hide file tree
Showing 10 changed files with 130 additions and 53 deletions.
52 changes: 26 additions & 26 deletions Compiler/SimCode/SimCodeMain.mo
Expand Up @@ -94,7 +94,6 @@ import HpcOmTaskGraph;
import SerializeModelInfo;
import TaskSystemDump;
import SerializeInitXML;
import Serializer;
import SimCodeDump;
import SimCodeUtil;
import StackOverflow;
Expand Down Expand Up @@ -276,7 +275,7 @@ algorithm
ExecStat.execStat("SimCode");

if Flags.isSet(Flags.SERIALIZED_SIZE) then
serializeNotify(simCode, filenamePrefix, "simCode");
serializeNotify(simCode, "SimCode");
ExecStat.execStat("Serialize simCode");
end if;

Expand Down Expand Up @@ -741,6 +740,7 @@ public function translateModel "
output String outFileDir;
output list<tuple<String, Values.Value>> resultValues;
protected
FCore.Cache inCache = cache;
Boolean generateFunctions = false;
Real timeSimCode=0.0, timeTemplates=0.0, timeBackend=0.0, timeFrontend=0.0;
type State = enumeration(frontend, backend, templates, simcode);
Expand Down Expand Up @@ -777,10 +777,13 @@ algorithm
SOME(dae) := odae;

if Flags.isSet(Flags.SERIALIZED_SIZE) then
serializeNotify(dae, filenameprefix, "dae");
serializeNotify(graph, filenameprefix, "graph");
serializeNotify(cache, filenameprefix, "cache");
serializeNotify(SymbolTable.get(), filenameprefix, "st");
serializeNotify(dae, "FrontEnd DAE");
serializeNotify(graph, "FCore.Graph");
serializeNotify((graph,inEnv), "FCore.Graph + Old graph");
serializeNotify(cache, "FCore.Cache");
serializeNotify((cache,inCache), "FCore.Cache + Old cache");
serializeNotify(SymbolTable.get(), "Symbol Table (Absyn and SCode)");
serializeNotify((SymbolTable.get(),dae,graph,inEnv,cache,inCache), "Symbol Table, DAE, Graph, OldGraph, Cache, OldCache");
ExecStat.execStat("Serialize FrontEnd");
end if;

Expand All @@ -792,7 +795,7 @@ algorithm
ExecStat.execStat("Transformations before backend");

if Flags.isSet(Flags.SERIALIZED_SIZE) then
serializeNotify(dae, filenameprefix, "dae2");
serializeNotify(dae, "FrontEnd DAE after transformations");
ExecStat.execStat("Serialize DAE (2)");
end if;

Expand All @@ -809,7 +812,7 @@ algorithm
GC.free(dae);

if Flags.isSet(Flags.SERIALIZED_SIZE) then
serializeNotify(dlow, filenameprefix, "dlow");
serializeNotify(dlow, "BackendDAECreate.lower");
ExecStat.execStat("Serialize dlow");
end if;

Expand All @@ -836,9 +839,11 @@ algorithm
state := State.simcode;

if Flags.isSet(Flags.SERIALIZED_SIZE) then
serializeNotify(dlow, filenameprefix, "simDAE");
serializeNotify(initDAE, filenameprefix, "initDAE");
serializeNotify(removedInitialEquationLst, filenameprefix, "removedInitialEquationLst");
serializeNotify(dlow, "BackendDAE (simulation)");
serializeNotify(initDAE, "BackendDAE (initialization)");
serializeNotify(initDAE_lambda0, "BackendDAE (lambda0)");
serializeNotify((dlow,initDAE,initDAE_lambda0), "BackendDAE (simulation+initialization+lambda0)");
serializeNotify(removedInitialEquationLst, "removedInitialEquationLst");
ExecStat.execStat("Serialize solved system");
end if;

Expand Down Expand Up @@ -941,9 +946,9 @@ algorithm
ExecStat.execStat("FrontEnd");

if Flags.isSet(Flags.SERIALIZED_SIZE) then
serializeNotify(dae, filenameprefix, "dae");
serializeNotify(graph, filenameprefix, "graph");
serializeNotify(outCache, filenameprefix, "cache");
serializeNotify(dae, "dae");
serializeNotify(graph, "graph");
serializeNotify(outCache, "cache");
ExecStat.execStat("Serialize FrontEnd");
end if;

Expand All @@ -954,7 +959,7 @@ algorithm
ExecStat.execStat("Transformations before backend");

if Flags.isSet(Flags.SERIALIZED_SIZE) then
serializeNotify(dae, filenameprefix, "dae2");
serializeNotify(dae, "dae2");
ExecStat.execStat("Serialize DAE (2)");
end if;

Expand All @@ -971,7 +976,7 @@ algorithm
GC.free(dae);

if Flags.isSet(Flags.SERIALIZED_SIZE) then
serializeNotify(dlow, filenameprefix, "dlow");
serializeNotify(dlow, "dlow");
ExecStat.execStat("Serialize dlow");
end if;

Expand All @@ -982,9 +987,9 @@ algorithm
timeBackend := System.realtimeTock(ClockIndexes.RT_CLOCK_BACKEND);

if Flags.isSet(Flags.SERIALIZED_SIZE) then
serializeNotify(bdae, filenameprefix, "simDAE");
serializeNotify(initDAE, filenameprefix, "initDAE");
serializeNotify(removedInitialEquationLst, filenameprefix, "removedInitialEquationLst");
serializeNotify(bdae, "simDAE");
serializeNotify(initDAE, "initDAE");
serializeNotify(removedInitialEquationLst, "removedInitialEquationLst");
ExecStat.execStat("Serialize solved system");
end if;

Expand Down Expand Up @@ -1275,7 +1280,7 @@ algorithm
ExecStat.execStat("SimCode");

if Flags.isSet(Flags.SERIALIZED_SIZE) then
serializeNotify(simCode, filenamePrefix, "simCode");
serializeNotify(simCode, "SimCode");
ExecStat.execStat("Serialize simCode");
end if;

Expand All @@ -1300,14 +1305,9 @@ end generateModelCodeDAE;

protected function serializeNotify<T>
input T data;
input String prefix;
input String name;
protected
Real fsize;
algorithm
Serializer.outputFile(data, prefix + "_"+name+".bin");
(,fsize,) := System.stat(prefix + "_"+name+".bin");
Error.addMessage(Error.SERIALIZED_SIZE, {name, StringUtil.bytesToReadableUnit(fsize)});
Error.addMessage(Error.SERIALIZED_SIZE, {name, StringUtil.bytesToReadableUnit(System.getSizeOfData(data))});
end serializeNotify;

annotation(__OpenModelica_Interface="backend");
Expand Down
2 changes: 1 addition & 1 deletion Compiler/Util/Error.mo
Expand Up @@ -1021,7 +1021,7 @@ public constant Message UNIONTYPE_MISSING_TYPEVARS = MESSAGE(5044, TRANSLATION()
public constant Message UNIONTYPE_WRONG_NUM_TYPEVARS = MESSAGE(5045, TRANSLATION(), ERROR(),
Util.gettext("Uniontype %s has %s type variables, but got %s."));
public constant Message SERIALIZED_SIZE = MESSAGE(5046, TRANSLATION(), NOTIFICATION(),
Util.gettext("%s has serialized size %s."));
Util.gettext("%s uses %s of memory."));

public constant Message COMPILER_ERROR = MESSAGE(5999, TRANSLATION(), ERROR(),
Util.notrans("%s"));
Expand Down
49 changes: 27 additions & 22 deletions Compiler/Util/ExecStat.mo
Expand Up @@ -34,28 +34,33 @@ protected
GC.ProfStats stats, oldStats;
algorithm
if Flags.isSet(Flags.EXEC_STAT) then
(stats as GC.PROFSTATS(bytes_allocd_since_gc=since, allocd_bytes_before_gc=before, heapsize_full=heapsize_full, free_bytes_full=free_bytes_full)) := GC.getProfStats();
memory := since+before;
oldStats := getGlobalRoot(Global.gcProfilingIndex);
GC.PROFSTATS(bytes_allocd_since_gc=since, allocd_bytes_before_gc=before) := oldStats;
oldMemory := since+before;
t := System.realtimeTock(ClockIndexes.RT_CLOCK_EXECSTAT);
total := System.realtimeTock(ClockIndexes.RT_CLOCK_EXECSTAT_CUMULATIVE);
timeStr := System.snprintff("%.4g", 20, t);
totalTimeStr := System.snprintff("%.4g", 20, total);
if Flags.isSet(Flags.GC_PROF) then
gcStr := GC.profStatsStr(stats, head="", delimiter=" / ");
Error.addMessage(Error.EXEC_STAT_GC, {name, timeStr, totalTimeStr, gcStr});
else
Error.addMessage(Error.EXEC_STAT, {name, timeStr, totalTimeStr,
StringUtil.bytesToReadableUnit(memory-oldMemory, maxSizeInUnit=500, significantDigits=4),
StringUtil.bytesToReadableUnit(memory, maxSizeInUnit=500, significantDigits=4),
StringUtil.bytesToReadableUnit(free_bytes_full, maxSizeInUnit=500, significantDigits=4),
StringUtil.bytesToReadableUnit(heapsize_full, maxSizeInUnit=500, significantDigits=4)
});
end if;
System.realtimeTick(ClockIndexes.RT_CLOCK_EXECSTAT);
setGlobalRoot(Global.gcProfilingIndex, stats);
for i in if Flags.isSet(Flags.EXEC_STAT_EXTRA_GC) then {1,2} else {1} loop
if i==2 then
GC.gcollect();
end if;
(stats as GC.PROFSTATS(bytes_allocd_since_gc=since, allocd_bytes_before_gc=before, heapsize_full=heapsize_full, free_bytes_full=free_bytes_full)) := GC.getProfStats();
memory := since+before;
oldStats := getGlobalRoot(Global.gcProfilingIndex);
GC.PROFSTATS(bytes_allocd_since_gc=since, allocd_bytes_before_gc=before) := oldStats;
oldMemory := since+before;
t := System.realtimeTock(ClockIndexes.RT_CLOCK_EXECSTAT);
total := System.realtimeTock(ClockIndexes.RT_CLOCK_EXECSTAT_CUMULATIVE);
timeStr := System.snprintff("%.4g", 20, t);
totalTimeStr := System.snprintff("%.4g", 20, total);
if Flags.isSet(Flags.GC_PROF) then
gcStr := GC.profStatsStr(stats, head="", delimiter=" / ");
Error.addMessage(Error.EXEC_STAT_GC, {name + (if i==2 then " GC" else ""), timeStr, totalTimeStr, gcStr});
else
Error.addMessage(Error.EXEC_STAT, {name + (if i==2 then " GC" else ""), timeStr, totalTimeStr,
StringUtil.bytesToReadableUnit(memory-oldMemory, maxSizeInUnit=500, significantDigits=4),
StringUtil.bytesToReadableUnit(memory, maxSizeInUnit=500, significantDigits=4),
StringUtil.bytesToReadableUnit(free_bytes_full, maxSizeInUnit=500, significantDigits=4),
StringUtil.bytesToReadableUnit(heapsize_full, maxSizeInUnit=500, significantDigits=4)
});
end if;
System.realtimeTick(ClockIndexes.RT_CLOCK_EXECSTAT);
setGlobalRoot(Global.gcProfilingIndex, stats);
end for;
end if;
end execStat;

Expand Down
5 changes: 4 additions & 1 deletion Compiler/Util/Flags.mo
Expand Up @@ -526,6 +526,8 @@ constant DebugFlag SUSAN_MATCHCONTINUE_DEBUG = DEBUG_FLAG(175, "susanDebug", fal
Util.gettext("Makes Susan generate code using try/else to better debug which function broke the expected match semantics."));
constant DebugFlag OLD_FE_UNITCHECK = DEBUG_FLAG(176, "oldFrontEndUnitCheck", false,
Util.gettext("Checks the consistency of units in equation (for the old front-end)."));
constant DebugFlag EXEC_STAT_EXTRA_GC = DEBUG_FLAG(177, "execstatGCcollect", false,
Util.gettext("When running execstat, also perform an extra full garbage collection."));

// This is a list of all debug flags, to keep track of which flags are used. A
// flag can not be used unless it's in this list, and the list is checked at
Expand Down Expand Up @@ -708,7 +710,8 @@ constant list<DebugFlag> allDebugFlags = {
IGNORE_CYCLES,
ALIAS_CONFLICTS,
SUSAN_MATCHCONTINUE_DEBUG,
OLD_FE_UNITCHECK
OLD_FE_UNITCHECK,
EXEC_STAT_EXTRA_GC
};

public
Expand Down
1 change: 1 addition & 0 deletions Compiler/Util/Serializer.mo
Expand Up @@ -41,6 +41,7 @@ encapsulated package Serializer
This package provides functions to serialize MetaModelica data.
The external C implementation is in TOP/Compiler/runtime/Serializer.c"

// Note: Reading back the data does not work as of 2018-02-19

public function outputFile<T> "
Prints the structure of the object."
Expand Down
9 changes: 9 additions & 0 deletions Compiler/Util/System.mo
Expand Up @@ -1301,5 +1301,14 @@ external "C" OpenModelica_updateUriMapping(OpenModelica.threadData(), namesAndDi
</html>"));
end updateUriMapping;

function getSizeOfData<T>
input T data;
output Real sz;
external "C" sz=SystemImpl__getSizeOfData(data) annotation(Library = {"omcruntime"}, Documentation(info="<html>
Counts the number of bytes that were allocated to hold the given data structure.
Includes constant data and handles cycles.
</html>"));
end getSizeOfData;

annotation(__OpenModelica_Interface="util");
end System;
1 change: 0 additions & 1 deletion Compiler/boot/LoadCompilerSources.mos
Expand Up @@ -395,7 +395,6 @@ if true then /* Suppress output */
"../Util/HashTableStringToUnit.mo",
"../Util/HashTableUnitToString.mo",
"../Util/PriorityQueue.mo",
"../Util/Serializer.mo",
"../Util/SimulationResults.mo",
"../Util/TaskGraphResults.mo"
};
Expand Down
3 changes: 2 additions & 1 deletion Compiler/runtime/Makefile.common
Expand Up @@ -31,10 +31,11 @@ OMC_OBJ_STUBS = corbaimpl_stub_omc.o

OMC_OBJ_BOOT = $(OMC_OBJ_SHARED) $(OMC_OBJ_STUBS)

OMC_OBJ = $(OMC_OBJ_SHARED) serializer.o \
OMC_OBJ = $(OMC_OBJ_SHARED) \
ptolemyio_omc.o SimulationResults_omc.o \
$(OMCCORBASRC)

# serializer.o # Disabled 2018-02-19; doesn't work to read back data
# Database_omc.o

all: install
Expand Down
59 changes: 58 additions & 1 deletion Compiler/runtime/systemimplmisc.cpp
Expand Up @@ -3,6 +3,8 @@


#include <string>
#include <unordered_set>
#include <stack>

using namespace std;

Expand Down Expand Up @@ -42,6 +44,61 @@ extern "C" {
return res;
}

}
#define GC_GRANULE_BYTES (2*sizeof(void*))

static inline size_t actualByteSize(size_t sz)
{
/* GC uses 2 words as the minimum allocation unit: a granule */
size_t res = GC_GRANULE_BYTES*((sz+GC_GRANULE_BYTES-1) / GC_GRANULE_BYTES);
return res;
}
#include <stdio.h>
double SystemImpl__getSizeOfData(void *data)
{
size_t sz=0;
std::unordered_set<void*> handled;
std::stack<void*> work;
work.push(data);
while (!work.empty()) {
void *item = work.top();
work.pop();
if (handled.find(item) != handled.end()) {
continue;
}
handled.insert(item);
if (MMC_IS_IMMEDIATE(item)) {
/* Uses up zero space */
continue;
}
mmc_uint_t hdr = MMC_GETHDR(item);
if (MMC_HDR_IS_FORWARD(hdr) || hdr==MMC_NILHDR || hdr==MMC_NONEHDR) {
/* Uses up zero space */
continue;
}
if (hdr==MMC_REALHDR) {
sz += actualByteSize(sizeof(void*)+sizeof(double));
continue;
}
if (MMC_HDRISSTRING(hdr)) {
sz += actualByteSize(sizeof(void*)+MMC_STRLEN(item)+1);
continue;
}
if (MMC_HDRISSTRUCT(hdr)) {
mmc_uint_t slots = MMC_HDRSLOTS(hdr);
mmc_uint_t ctor = MMC_HDRCTOR(hdr);
sz += actualByteSize(sizeof(void*)*(slots+1));
// Push the sub-objects to the stack
for (int i = (ctor>=3 && ctor != MMC_ARRAY_TAG) ? 2 /* MM record description */ : 1; i <= slots; i++) {
void *ptr = (MMC_FETCH(MMC_OFFSET(MMC_UNTAGPTR(item), i)));
work.push(ptr);
}
continue;
}
fprintf(stderr, "abort... bytes=%d num items=%d\n", sz, handled.size());
printAny(item);
abort();
}
return sz;
}

}
2 changes: 2 additions & 0 deletions Compiler/runtime/systemimplmisc.h
Expand Up @@ -5,6 +5,8 @@ extern "C" {

char* _replace(const char* source_str, const char* search_str, const char* replace_str);

int SystemImpl__getSizeOfData(void *data);

}

#endif

0 comments on commit 526b822

Please sign in to comment.