New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Increased memory usage #894
Comments
First step for reproducing the problem: """
This script has very predictable performance and memory usage, around 1.4GiB RSS on my system,
and does not reproduce the problem.
"""
import time
import os
from libertem.api import Context
def main():
print(f"running as pid {os.getpid()}")
ctx = Context()
ds = ctx.load("k2is", path="/home/alex/Data/K2IS/Capture52/Capture52_.gtg")
while True:
t0 = time.time()
pick_analysis = ctx.create_pick_analysis(dataset=ds, x=7, y=16)
ctx.run(pick_analysis)
t1 = time.time()
print(f"pick took {t1-t0:.2f}s")
if __name__ == "__main__":
main() Performing the same picking action in the GUI somehow does show the problem, pointing at a problem with the web API somehow. With the GUI, pick-scrubbing increases memory usage of worker processes somehow. The main process also grows slightly, which may be related to keeping results around for longer than needed (I'm aware of that issue and I think I know how to fix it) |
For reference, here is a script that I used for debugging this: import time
import os
import gc
import mmap
from pympler import muppy, summary
import numpy as np
from libertem.api import Context
from libertem.executor.inline import InlineJobExecutor
from libertem.executor.dask import DaskJobExecutor, cluster_spec
from libertem.udf.base import UDF, UDFTask
from libertem.udf.raw import PickUDF
def dump_udf_objgraph():
objs = [
o
for o in muppy.get_objects()
if issubclass(type(o), UDFTask)
]
print(f"objs: {len(objs)}")
import objgraph
chain = objgraph.find_backref_chain(objs[-1], objgraph.is_proper_module)
print([
type(o)
for o in chain
])
print(chain[2])
print(chain[2].cache_info())
print(chain[2].__wrapped__)
objgraph.show_chain(
chain,
filename='/tmp/refs.png'
)
def run_cpu_worker(ctx, fn):
fut = ctx.executor.client.submit(fn, resources={
"CPU": 1, 'compute': 1, 'ndarray': 1
})
return fut.result()
def main():
print(f"running as pid {os.getpid()}")
if True:
ctx = Context(
executor=DaskJobExecutor.make_local(
spec=cluster_spec(**{
"cpus": [0],
"has_cupy": False,
"cudas": []
})
)
)
else:
ctx = Context(executor=InlineJobExecutor())
ds = ctx.load("k2is", path="/home/alex/Data/K2IS/Capture52/Capture52_.gtg")
for i in range(102):
pick_udf = PickUDF()
roi = np.zeros(ds.shape.nav, dtype=bool)
roi.reshape((-1,))[i] = True
ctx.run_udf(dataset=ds, udf=pick_udf, roi=roi)
run_cpu_worker(ctx, dump_udf_objgraph)
if __name__ == "__main__":
main() And here is another one that loops indefinitely: import time
import os
import gc
import mmap
import numpy as np
from pympler import muppy, summary
from libertem.api import Context
from libertem.executor.inline import InlineJobExecutor
from libertem.executor.dask import DaskJobExecutor, cluster_spec
from libertem.udf.raw import PickUDF
def profile_memory():
objects = muppy.get_objects()
objects = muppy.filter(objects, Type=mmap.mmap)
summ = summary.summarize(objects)
return os.getpid(), summ, len(objects)
def get_profile(ctx, old_profile=None):
fut = ctx.executor.client.submit(profile_memory, resources={
"CPU": 1, 'compute': 1, 'ndarray': 1
})
pid, summ, ll = fut.result()
summary.print_(summ, sort='#')
print(ll)
return summ
def main():
print(f"running as pid {os.getpid()}")
if True:
ctx = Context(
executor=DaskJobExecutor.make_local(
spec=cluster_spec(**{
"cpus": [0],
"has_cupy": False,
"cudas": []
})
)
)
else:
ctx = Context(executor=InlineJobExecutor())
ds = ctx.load("k2is", path="/home/alex/Data/K2IS/Capture52/Capture52_.gtg")
counter = 0
old_summ = get_profile(ctx)
while True:
counter += 1
for y in range(ds.shape.nav[0]):
for x in range(ds.shape.nav[1]):
t0 = time.time()
pick_udf = PickUDF()
roi = np.zeros(ds.shape.nav, dtype=bool)
roi[y, x] = True
ctx.run_udf(dataset=ds, udf=pick_udf, roi=roi)
t1 = time.time()
# ctx.executor.client.run(gc.collect)
print(f"round {counter} pick took {t1-t0:.2f}s")
# summ = get_profile(ctx, old_summ)
# old_summ = summ
if __name__ == "__main__":
main() Both need |
Compared to 0.5, we are using a bit more memory (see discussion in #814). We need to track memory usage and see if we can fix this easily - I suspect this can be a bit harder to fix, as we can't easily keep buffers between processing partitions, for example.
The text was updated successfully, but these errors were encountered: