Permalink
Browse files

Add option to sampling profiler to additionally sample memory.

Summary:
This adds an option to additionally sample memory.  Since memory sampling is
slow on prod, it does it fairly infrequently.  (After some further profiling,
we may be able to turn it up a bit.)

Currently it does nothing on a dev server, since the dev server does not allow
importing the python module, and does not return useful data from the prod
runtime API.

Screenshots:
{F23421}
{F23422}

Test Plan:
Set the dev server fallback to return random data instead of zero, to check
that the interface work.  Test the profiler in the dev shell.  Will also test
on staging.

Reviewers: alan

Reviewed By: alan

Differential Revision: http://phabricator.khanacademy.org/D10320
  • Loading branch information...
1 parent bfba282 commit 6f69a2d820f83288276cfa9dfe7c4515954582d2 @benjaminjkraft benjaminjkraft committed Jun 24, 2014
Showing with 157 additions and 16 deletions.
  1. +24 −9 profiler.py
  2. +63 −5 sampling_profiler.py
  3. +10 −0 static/css/profiler.css
  4. +46 −2 static/js/profiler.js
  5. +10 −0 static/js/template.tmpl
  6. +4 −0 util.py
View
@@ -33,9 +33,6 @@
import config
import util
-dev_server = os.environ["SERVER_SOFTWARE"].startswith("Devel")
-
-
class CurrentRequestId(object):
"""A per-request identifier accessed by other pieces of mini profiler.
@@ -50,14 +47,14 @@ class CurrentRequestId(object):
@staticmethod
def get():
- if dev_server:
+ if util.dev_server:
return CurrentRequestId.dev_server_request_id
else:
return CurrentRequestId._local.request_id
@staticmethod
def set(request_id):
- if dev_server:
+ if util.dev_server:
CurrentRequestId.dev_server_request_id = request_id
else:
CurrentRequestId._local.request_id = request_id
@@ -79,10 +76,13 @@ class Mode(object):
SIMPLE = "simple" # Simple start/end timing for the request as a whole
CPU_INSTRUMENTED = "instrumented" # Profile all function calls
CPU_SAMPLING = "sampling" # Sample call stacks
+ CPU_MEMORY_SAMPLING = "memory_sampling" # Sample call stacks and memory
CPU_LINEBYLINE = "linebyline" # Line-by-line profiling on a subset of functions
RPC_ONLY = "rpc" # Profile all RPC calls
RPC_AND_CPU_INSTRUMENTED = "rpc_instrumented" # RPCs and all fxn calls
RPC_AND_CPU_SAMPLING = "rpc_sampling" # RPCs and sample call stacks
+ RPC_AND_CPU_MEMORY_SAMPLING = "rpc_memory_sampling" # RPCs and sample call
+ # stacks and memory
RPC_AND_CPU_LINEBYLINE = "rpc_linebyline" # RPCs and line-by-line profiling
@staticmethod
@@ -98,10 +98,12 @@ def get_mode(environ):
Mode.SIMPLE,
Mode.CPU_INSTRUMENTED,
Mode.CPU_SAMPLING,
+ Mode.CPU_MEMORY_SAMPLING,
Mode.CPU_LINEBYLINE,
Mode.RPC_ONLY,
Mode.RPC_AND_CPU_INSTRUMENTED,
Mode.RPC_AND_CPU_SAMPLING,
+ Mode.RPC_AND_CPU_MEMORY_SAMPLING,
Mode.RPC_AND_CPU_LINEBYLINE]):
mode = Mode.RPC_AND_CPU_INSTRUMENTED
@@ -112,13 +114,22 @@ def is_rpc_enabled(mode):
return mode in [
Mode.RPC_ONLY,
Mode.RPC_AND_CPU_INSTRUMENTED,
- Mode.RPC_AND_CPU_SAMPLING]
+ Mode.RPC_AND_CPU_SAMPLING,
+ Mode.RPC_AND_CPU_MEMORY_SAMPLING]
@staticmethod
def is_sampling_enabled(mode):
return mode in [
Mode.CPU_SAMPLING,
- Mode.RPC_AND_CPU_SAMPLING]
+ Mode.CPU_MEMORY_SAMPLING,
+ Mode.RPC_AND_CPU_SAMPLING,
+ Mode.RPC_AND_CPU_MEMORY_SAMPLING]
+
+ @staticmethod
+ def is_memory_sampling_enabled(mode):
+ return mode in [
+ Mode.CPU_MEMORY_SAMPLING,
+ Mode.RPC_AND_CPU_MEMORY_SAMPLING]
@staticmethod
def is_instrumented_enabled(mode):
@@ -218,7 +229,7 @@ def get(self):
# Log fetching doesn't work on the dev server and this data isn't
# relevant in dev server's case, so we return a simple fake response.
- if dev_server:
+ if util.dev_server:
dict_request_log = {
"pending_ms": 0,
"loading_request": False,
@@ -430,7 +441,11 @@ def profile_start_response(self, app, environ, start_response):
# this file so we don't bring in a lot of imports for users who
# don't have the profiler enabled.
from . import sampling_profiler
- self.sampling_prof = sampling_profiler.Profile()
+ if Mode.is_memory_sampling_enabled(self.mode):
+ self.sampling_prof = sampling_profiler.Profile(
+ memory_sample_rate=5)
+ else:
+ self.sampling_prof = sampling_profiler.Profile()
result_fxn_wrapper = self.sampling_prof.run
elif Mode.is_linebyline_enabled(self.mode):
View
@@ -26,6 +26,31 @@
from . import util
+
+def get_memory():
+ if util.dev_server:
+ try:
+ # This will work in a dev shell, but will raise an error on
+ # a dev server. We convert to MB for consistency with prod.
+ #
+ # TODO(benkraft): Hack the dev server to allow the import.
+ # It prohibits any import that wouldn't be allowed on prod,
+ # but here we would actually like to be able to do the
+ # import anyway, since we already do things differently on
+ # prod.
+ #
+ # TODO(benkraft): Craig thinks the live runtime API is
+ # actually reporting VSS, not RSS, so maybe we should use
+ # that for consistency. Better yet, use both.
+ import resource
+ return resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024.
+ except:
+ return 0
+ else:
+ # This will work anywhere, but will return 0 on dev. It involves an RPC.
+ return runtime.memory_usage().current()
+
+
class InspectingThread(threading.Thread):
"""Thread that periodically triggers profiler inspections."""
SAMPLES_PER_SECOND = 250
@@ -57,11 +82,13 @@ def run(self):
end, but the samples may not be perfectly even."""
next_sample_time_seconds = time.time()
+ sample_number = 0
# Keep sampling until this thread is explicitly stopped.
while not self.should_stop():
# Take a sample of the main request thread's frame stack...
- self.profile.take_sample()
+ self.profile.take_sample(sample_number)
+ sample_number += 1
# ...then sleep and let it do some more work.
next_sample_time_seconds += (
@@ -104,11 +131,29 @@ def get_frame_descriptions(self):
class Profile(object):
- """Profiler that periodically inspects a request and logs stack traces."""
- def __init__(self):
+ """Profiler that periodically inspects a request and logs stack traces.
+
+ If memory_sample_rate is nonzero, approximately that many samples per
+ second will also profile current memory usage. Note that on prod, this
+ involves an RPC, so running more than 5 or 10 samples per second is not
+ recommended.
+ """
+ def __init__(self, memory_sample_rate=0):
+ # Every self.memory_sample_every'th sample will also record memory. We
+ # want this to be such that this will add up to memory_sample_rate
+ # samples per second (approximately).
+ if memory_sample_rate:
+ self.memory_sample_every = max(1, int(round(
+ InspectingThread.SAMPLES_PER_SECOND / memory_sample_rate)))
+ else:
+ self.memory_sample_every = 0
+
# All saved stack trace samples
self.samples = []
+ # All saved memory samples in MB, by timestamp_ms
+ self.memory_samples = {}
+
# Thread id for the request thread currently being profiled
self.current_request_thread_id = None
@@ -137,18 +182,29 @@ def results(self):
samples = [{
"timestamp_ms": util.milliseconds_fmt(sample.timestamp_ms, 1),
+ "memory_used": self.memory_samples.get(sample.timestamp_ms),
"stack_frames": [frame_indexes[desc]
for desc in sample.get_frame_descriptions()]
} for sample in self.samples]
+ # For convenience, we also send along with each sample the index
+ # of the previous memory sample.
+ if self.memory_sample_every:
+ prev_memory_sample_index = 0
+ for i, sample in enumerate(samples):
+ sample['prev_memory_sample_index'] = prev_memory_sample_index
+ if sample['memory_used'] is not None:
+ prev_memory_sample_index = i
+
return {
"frame_names": [
util.short_method_fmt(frame) for frame in frames],
"samples": samples,
"total_samples": total_samples,
}
- def take_sample(self):
+ def take_sample(self, sample_number):
+ timestamp_ms = (time.time() - self.start_time) * 1000
# Look at stacks of all existing threads...
# See http://bzimmer.ziclix.com/2008/12/17/python-thread-dumps/
for thread_id, active_frame in sys._current_frames().items():
@@ -158,9 +214,11 @@ def take_sample(self):
# profile more than one of them.
if thread_id == self.current_request_thread_id:
# Grab a sample of this thread's current stack
- timestamp_ms = (time.time() - self.start_time) * 1000
self.samples.append(ProfileSample.from_frame_and_timestamp(
active_frame, timestamp_ms))
+ if self.memory_sample_every:
+ if sample_number % self.memory_sample_every == 0:
+ self.memory_samples[timestamp_ms] = get_memory()
def run(self, fxn):
"""Run function with samping profiler enabled, saving results."""
View
@@ -419,6 +419,16 @@
vertical-align: middle;
}
+.g-m-p .sample-memory-display {
+ width: 300px;
+ display: inline-block;
+ vertical-align: middle;
+}
+
+.g-m-p .sample-memory{
+ padding-right: 10px;
+}
+
.g-m-p .ignore-frames-display {
width: 100px;
display: inline-block;
View
@@ -9,10 +9,12 @@ var GaeMiniProfiler = {
SIMPLE: "simple",
CPU_INSTRUMENTED: "instrumented",
CPU_SAMPLING: "sampling",
+ CPU_MEMORY_SAMPLING: "memory_sampling",
CPU_LINEBYLINE: "linebyline",
RPC_ONLY: "rpc",
RPC_AND_CPU_INSTRUMENTED: "rpc_instrumented",
RPC_AND_CPU_SAMPLING: "rpc_sampling",
+ RPC_AND_CPU_MEMORY_SAMPLING: "rpc_memory_sampling",
RPC_AND_CPU_LINEBYLINE: "rpc_linebyline"
},
@@ -86,7 +88,8 @@ var GaeMiniProfiler = {
isRpcEnabled: function(mode) {
return (mode == this.modes.RPC_ONLY ||
mode == this.modes.RPC_AND_CPU_INSTRUMENTED ||
- mode == this.modes.RPC_AND_CPU_SAMPLING);
+ mode == this.modes.RPC_AND_CPU_SAMPLING ||
+ mode == this.modes.RPC_AND_CPU_MEMORY_SAMPLING);
},
/**
@@ -102,7 +105,17 @@ var GaeMiniProfiler = {
*/
isSamplingEnabled: function(mode) {
return (mode == this.modes.CPU_SAMPLING ||
- mode == this.modes.RPC_AND_CPU_SAMPLING);
+ mode == this.modes.CPU_MEMORY_SAMPLING ||
+ mode == this.modes.RPC_AND_CPU_SAMPLING ||
+ mode == this.modes.RPC_AND_CPU_MEMORY_SAMPLING);
+ },
+
+ /**
+ * True if profiler mode has enabled memory sampling
+ */
+ isMemorySamplingEnabled: function(mode) {
+ return (mode == this.modes.CPU_MEMORY_SAMPLING ||
+ mode == this.modes.RPC_AND_CPU_MEMORY_SAMPLING);
},
/**
@@ -439,6 +452,8 @@ var GaeMiniProfiler = {
var jSlider = searchRoot.find(".sample-number-slider");
var jTable = searchRoot.find(".sample-table");
var jSampleTimestamp = searchRoot.find(".sample-timestamp");
+ var jSampleMemory = searchRoot.find(".sample-memory");
+ var jSampleMemoryIncrease = searchRoot.find(".sample-memory-increase");
var jIgnoreFramesInput = searchRoot.find(".ignore-frames-slider");
var jIgnoredFrames = searchRoot.find(".sample-num-frames-ignored");
var jTableBody = jTable.find("tbody");
@@ -454,6 +469,35 @@ var GaeMiniProfiler = {
jSampleTimestamp.html(samples[sampleIndex].timestamp_ms + "ms");
jIgnoredFrames.html(minFrameToDisplay);
+ if (jSampleMemory) {
+ if (samples[sampleIndex].memory_used) {
+ var memoryIndex = sampleIndex;
+ var sampledAt = "";
+ } else {
+ var memoryIndex = samples[sampleIndex].prev_memory_sample_index;
+ var sampledAt = ("last sampled at " +
+ samples[memoryIndex].timestamp_ms + "ms");
+ }
+
+ var memoryUsed = samples[memoryIndex].memory_used
+ jSampleMemory.html(Math.round(memoryUsed * 100) / 100 + " MB");
+ if (memoryIndex > 0) {
+ var prevIndex = samples[memoryIndex].prev_memory_sample_index;
+ var diff = memoryUsed - samples[prevIndex].memory_used;
+ diff = Math.round(diff * 100) / 100;
+ if (diff >= 0) {
+ diff = "+" + diff;
+ }
+ sampledAt = ", " + sampledAt
+ jSampleMemoryIncrease.html(
+ "(" + diff + " MB" + sampledAt + ")");
+ } else if (sampledAt) {
+ jSampleMemoryIncrease.html("(" + sampledAt + ")");
+ } else {
+ jSampleMemoryIncrease.html("");
+ }
+ }
+
GaeMiniProfiler.buildSampleTable(jTable,
samples[sampleIndex].stack_frames, frameNames,
minFrameToDisplay);
View
@@ -49,6 +49,7 @@
<td>
<input type="radio" id="cpu_instrumented" name="cpu" value="instrumented"/><label for="cpu_instrumented"> instrumented</label><br>
<input type="radio" id="cpu_sampling" name="cpu" value="sampling"/><label for="cpu_sampling"> sampling</label><br>
+ <input type="radio" id="cpu_memory_sampling" name="cpu" value="memory_sampling"/><label for="cpu_memory_sampling"> sampling with memory</label><br>
<input type="radio" id="cpu_linebyline" name="cpu" value="linebyline"/><label for="cpu_linebyline"> line-by-line</label><br>
<input type="radio" id="cpu_disabled" name="cpu" value=""/><label for="cpu_disabled"> disabled</label>
</td>
@@ -180,6 +181,15 @@
${profiler_results.samples[profiler_results.total_samples - 1].timestamp_ms}ms
</span>
</div>
+ {{if GaeMiniProfiler.isMemorySamplingEnabled(mode)}}
+ <div>
+ <span class="sample-memory-display">
+ <b>Memory:</b>
+ <span class="sample-memory"></span>
+ <span class="sample-memory-increase"></span>
+ </span>
+ </div>
+ {{/if}}
<div>
<span class="ignore-frames-display"><b>Hide frames:</b>
<span class="sample-num-frames-ignored"></span></span></span>
View
@@ -1,3 +1,7 @@
+import os
+
+dev_server = os.environ["SERVER_SOFTWARE"].startswith("Devel")
+
def seconds_fmt(f, n=0):
return milliseconds_fmt(f * 1000, n)

0 comments on commit 6f69a2d

Please sign in to comment.