Skip to content

Commit

Permalink
Code review: 340970043: Changed CPU time profiler to store samples in…
Browse files Browse the repository at this point in the history
…stead of summary log2timeline#1779
  • Loading branch information
joachimmetz committed Mar 30, 2018
1 parent 6b6adef commit 2ecf6f8
Show file tree
Hide file tree
Showing 8 changed files with 65 additions and 61 deletions.
4 changes: 2 additions & 2 deletions config/dpkg/changelog
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
plaso (20180329-1) unstable; urgency=low
plaso (20180330-1) unstable; urgency=low

* Auto-generated

-- Log2Timeline <log2timeline-dev@googlegroups.com> Thu, 29 Mar 2018 19:14:03 +0200
-- Log2Timeline <log2timeline-dev@googlegroups.com> Fri, 30 Mar 2018 07:24:16 +0200
2 changes: 1 addition & 1 deletion plaso/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@

from __future__ import unicode_literals

__version__ = '20180329'
__version__ = '20180330'
78 changes: 34 additions & 44 deletions plaso/engine/profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from __future__ import unicode_literals

import abc
import gzip
import os
import time

Expand All @@ -13,40 +14,29 @@
hpy = None


class CPUTimeMeasurements(object):
"""The CPU time measurements.
class CPUTimeMeasurement(object):
"""The CPU time measurement.
Attributes:
number_of_samples (int): number of samples.
total_cpu_time (int): total CPU time measured by the samples.
total_system_time (int): total system time measured by the samples.
start_cpu_time (int): start CPU time.
total_cpu_time (int): total CPU time.
"""

def __init__(self):
"""Initializes the CPU time measurements object."""
super(CPUTimeMeasurements, self).__init__()
self._cpu_time = None
self._system_time = None
self.number_of_samples = 0
self.total_cpu_time = 0
self.total_system_time = 0
"""Initializes the CPU time measurement."""
super(CPUTimeMeasurement, self).__init__()
self.start_cpu_time = None
self.total_cpu_time = None

def SampleStart(self):
"""Starts measuring the CPU and system time."""
self._cpu_time = time.clock()
self._system_time = time.time()
self.start_cpu_time = time.clock()
self.total_cpu_time = 0

def SampleStop(self):
"""Stops the current measurement and adds the sample."""
if self._cpu_time is None or self._system_time is None:
return

self.total_cpu_time += time.clock() - self._cpu_time
self.total_system_time += time.time() - self._system_time
self.number_of_samples += 1

self._cpu_time = None
self._system_time = None
if self.start_cpu_time is not None:
self.total_cpu_time += time.clock() - self.start_cpu_time


class CPUTimeProfiler(object):
Expand All @@ -64,6 +54,7 @@ def __init__(self, identifier, path=None):
"""
super(CPUTimeProfiler, self).__init__()
self._identifier = identifier
self._path = path
self._profile_measurements = {}
self._sample_file = '{0:s}-{1!s}.csv'.format(
self._FILENAME_PREFIX, identifier)
Expand All @@ -78,7 +69,7 @@ def StartTiming(self, profile_name):
profile_name (str): name of the profile to sample.
"""
if profile_name not in self._profile_measurements:
self._profile_measurements[profile_name] = CPUTimeMeasurements()
self._profile_measurements[profile_name] = CPUTimeMeasurement()

self._profile_measurements[profile_name].SampleStart()

Expand All @@ -88,30 +79,29 @@ def StopTiming(self, profile_name):
Args:
profile_name (str): name of the profile to sample.
"""
if profile_name not in self._profile_measurements:
return

self._profile_measurements[profile_name].SampleStop()
measurements = self._profile_measurements.get(profile_name)
if measurements:
measurements.SampleStop()

def Write(self):
"""Writes the CPU time measurements to a sample file."""
try:
os.remove(self._sample_file)
except OSError:
pass
sample = '{0:f}\t{1:s}\t{2:f}\n'.format(
measurements.start_cpu_time, profile_name,
measurements.total_cpu_time)
self._sample_file.write(sample)

with open(self._sample_file, 'wb') as file_object:
line = (
'profile name\tnumber of samples\ttotal CPU time\t'
'total system time\n')
file_object.write(line.encode('utf-8'))
def Start(self):
"""Starts the profiler."""
filename = '{0:s}-{1:s}.csv.gz'.format(
self._FILENAME_PREFIX, self._identifier)
if self._path:
filename = os.path.join(self._path, filename)

for name, measurements in iter(self._profile_measurements.items()):
line = '{0:s}\t{1!s}\t{2!s}\t{3!s}\n'.format(
name, measurements.number_of_samples,
measurements.total_cpu_time, measurements.total_system_time)
self._sample_file = gzip.open(filename, 'wb')
self._sample_file.write('CPU time\tName\tProcessing time\n')

file_object.write(line.encode('utf-8'))
def Stop(self):
"""Stops the profiler."""
self._sample_file.close()
self._sample_file = None


class BaseMemoryProfiler(object):
Expand Down
13 changes: 8 additions & 5 deletions plaso/engine/single_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,9 +187,8 @@ def _StartProfiling(self, extraction_worker):
return

if self._processing_configuration.profiling.HaveProfileMemoryGuppy():
identifier = '{0:s}-memory'.format(self._name)
self._guppy_memory_profiler = profiler.GuppyMemoryProfiler(
identifier, path=self._processing_configuration.profiling.directory,
self._name, path=self._processing_configuration.profiling.directory,
profiling_sample_rate=(
self._processing_configuration.profiling.sample_rate))
self._guppy_memory_profiler.Start()
Expand All @@ -199,17 +198,20 @@ def _StartProfiling(self, extraction_worker):
self._parsers_profiler = profiler.ParsersProfiler(
identifier, path=self._processing_configuration.profiling.directory)
extraction_worker.SetParsersProfiler(self._parsers_profiler)
self._parsers_profiler.Start()

if self._processing_configuration.profiling.HaveProfileProcessing():
identifier = '{0:s}-processing'.format(self._name)
self._processing_profiler = profiler.ProcessingProfiler(
identifier, path=self._processing_configuration.profiling.directory)
extraction_worker.SetProcessingProfiler(self._processing_profiler)
self._processing_profiler.Start()

if self._processing_configuration.profiling.HaveProfileSerializers():
identifier = '{0:s}-serializers'.format(self._name)
self._serializers_profiler = profiler.SerializersProfiler(
identifier, path=self._processing_configuration.profiling.directory)
self._serializers_profiler.Start()

def _StopProfiling(self, extraction_worker):
"""Stops profiling.
Expand All @@ -219,20 +221,21 @@ def _StopProfiling(self, extraction_worker):
"""
if self._guppy_memory_profiler:
self._guppy_memory_profiler.Sample()
self._guppy_memory_profiler.Stop()
self._guppy_memory_profiler = None

if self._parsers_profiler:
extraction_worker.SetParsersProfiler(None)
self._parsers_profiler.Write()
self._parsers_profiler.Stop()
self._parsers_profiler = None

if self._processing_profiler:
extraction_worker.SetProcessingProfiler(None)
self._processing_profiler.Write()
self._processing_profiler.Stop()
self._processing_profiler = None

if self._serializers_profiler:
self._serializers_profiler.Write()
self._serializers_profiler.Stop()
self._serializers_profiler = None

def _UpdateStatus(
Expand Down
8 changes: 5 additions & 3 deletions plaso/multi_processing/psort.py
Original file line number Diff line number Diff line change
Expand Up @@ -645,11 +645,13 @@ def _StartProfiling(self):
identifier = '{0:s}-processing'.format(self._name)
self._processing_profiler = profiler.ProcessingProfiler(
identifier, path=self._profiling_configuration.directory)
self._processing_profiler.Start()

if self._profiling_configuration.HaveProfileSerializers():
identifier = '{0:s}-serializers'.format(self._name)
self._serializers_profiler = profiler.SerializersProfiler(
identifier, path=self._profiling_configuration.directory)
self._serializers_profiler.Start()

def _StatusUpdateThreadMain(self):
"""Main function of the status update thread."""
Expand Down Expand Up @@ -724,17 +726,17 @@ def _StopProfiling(self):
"""Stops profiling."""
if self._guppy_memory_profiler:
self._guppy_memory_profiler.Sample()
self._guppy_memory_profiler.Stop()
self._guppy_memory_profiler = None

if self._processing_profiler:
self._processing_profiler.Write()
self._processing_profiler.Stop()
self._processing_profiler = None

if self._serializers_profiler:
self._serializers_profiler.Write()
self._serializers_profiler.Stop()
self._serializers_profiler = None


def _UpdateProcessingStatus(self, pid, process_status, used_memory):
"""Updates the processing status.
Expand Down
7 changes: 5 additions & 2 deletions plaso/multi_processing/task_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -529,11 +529,13 @@ def _StartProfiling(self):
identifier = '{0:s}-processing'.format(self._name)
self._processing_profiler = profiler.ProcessingProfiler(
identifier, path=self._processing_configuration.profiling.directory)
self._processing_profiler.Start()

if self._processing_configuration.profiling.HaveProfileSerializers():
identifier = '{0:s}-serializers'.format(self._name)
self._serializers_profiler = profiler.SerializersProfiler(
identifier, path=self._processing_configuration.profiling.directory)
self._serializers_profiler.Start()

def _StatusUpdateThreadMain(self):
"""Main function of the status update thread."""
Expand Down Expand Up @@ -598,14 +600,15 @@ def _StopProfiling(self):
"""Stops profiling."""
if self._guppy_memory_profiler:
self._guppy_memory_profiler.Sample()
self._guppy_memory_profiler.Stop()
self._guppy_memory_profiler = None

if self._processing_profiler:
self._processing_profiler.Write()
self._processing_profiler.Stop()
self._processing_profiler = None

if self._serializers_profiler:
self._serializers_profiler.Write()
self._serializers_profiler.Stop()
self._serializers_profiler = None

def _UpdateForemanProcessStatus(self):
Expand Down
10 changes: 7 additions & 3 deletions plaso/multi_processing/worker_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,36 +298,40 @@ def _StartProfiling(self):
self._parsers_profiler = profiler.ParsersProfiler(
identifier, path=self._processing_configuration.profiling.directory)
self._extraction_worker.SetParsersProfiler(self._parsers_profiler)
self._parsers_profiler.Start()

if self._processing_configuration.profiling.HaveProfileProcessing():
identifier = '{0:s}-processing'.format(self._name)
self._processing_profiler = profiler.ProcessingProfiler(
identifier, path=self._processing_configuration.profiling.directory)
self._extraction_worker.SetProcessingProfiler(self._processing_profiler)
self._processing_profiler.Start()

if self._processing_configuration.profiling.HaveProfileSerializers():
identifier = '{0:s}-serializers'.format(self._name)
self._serializers_profiler = profiler.SerializersProfiler(
identifier, path=self._processing_configuration.profiling.directory)
self._serializers_profiler.Start()

def _StopProfiling(self):
"""Stops profiling."""
if self._guppy_memory_profiler:
self._guppy_memory_profiler.Sample()
self._guppy_memory_profiler.Stop()
self._guppy_memory_profiler = None

if self._parsers_profiler:
self._extraction_worker.SetParsersProfiler(None)
self._parsers_profiler.Write()
self._parsers_profiler.Stop()
self._parsers_profiler = None

if self._processing_profiler:
self._extraction_worker.SetProcessingProfiler(None)
self._processing_profiler.Write()
self._processing_profiler.Stop()
self._processing_profiler = None

if self._serializers_profiler:
self._serializers_profiler.Write()
self._serializers_profiler.Stop()
self._serializers_profiler = None

def SignalAbort(self):
Expand Down
4 changes: 3 additions & 1 deletion tests/engine/profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,14 @@ def testCPUTimeProfiler(self):
test_profiler = profiler.CPUTimeProfiler(
'unittest', path=temp_directory)

test_profiler.Start()

for _ in range(5):
test_profiler.StartTiming('test_profile')
time.sleep(0.01)
test_profiler.StopTiming('test_profile')

test_profiler.Write()
test_profiler.Stop()


# Note that this test can be extremely slow with guppy version 0.1.9
Expand Down

0 comments on commit 2ecf6f8

Please sign in to comment.