Initial commit.

Crunch-io · Jul 3, 2019 · 31783f7 · 31783f7
1 parent 8ef6c1b
commit 31783f7
Show file tree

Hide file tree

Showing 11 changed files with 1,762 additions and 2 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,35 @@
+*.py[cod]
+*.orig
+
+# Packages
+*.egg
+*.egg-info
+dist
+build
+eggs
+parts
+bin
+local
+var
+sdist
+develop-eggs
+.installed.cfg
+lib64
+venv
+
+# Installer logs
+pip-log.txt
+
+# Unit test / coverage reports
+.coverage.*
+.tox
+nosetests.xml
+junit.xml
+
+# Translations
+*.mo
+
+.cache
+
+# PyTest
+.pytest_cache/
diff --git a/README.md b/README.md
@@ -1,2 +1,73 @@
-# probes
-A system to Instrument running Python code
+# Probes
+
+A library for instrumenting Python code at runtime.
+
+Structured logs and metrics and observability are great, but almost always
+require you to alter your code, which interrupts the flow when reading code.
+It also typically requires a build cycle to alter; it's no fun making a ticket,
+getting review, waiting for a build and deploy, and then doing it all again
+to back out your temporary additions. This is doubly true when doing research,
+where you might perform a dozen small experiments to measure your live code.
+
+This library allows you to dynamically add probes at runtime instead.
+Probes are:
+    * reliable: errors will never affect your production code
+    * ephemeral: set a "lifespan" (in minutes), after which point the probe detaches
+    * comprehensive: all references to the target function are instrumented
+    * fast: measure most functions with fast local lookups; uses hunter
+      (in Cython) for more invasive internal probes.
+
+Individual probes can be created directly with the FunctionProbe class:
+
+```#python
+>>> from path.to.module import myclass
+>>> myclass().add13(arg=5)
+18
+>>> p = probes.FunctionProbe("path.to.module.myclass.add13")
+>>> p.instruments["foo"] = probes.LogInstrument("foo", "arg", internal=False)
+>>> p.start()
+>>> myclass().add13(arg=5)
+Probe (foo) = 5
+18
+```
+
+Managers
+--------
+
+In a running system, we want to add, remove, start, and stop probes without
+having to code at an interactive prompt or restart the system; we do this
+with a ProbeManager. Start by configuring the global probes.manager:
+
+```#python
+>>> probes.manager.instrument_classes = {
+    "log": LogInstrument,
+    "hist": MyHistogramInstrument,
+    "incr": MyIncrementInstrument,
+}
+>>> probes.manager.global_namespace.update({"foo": foo})
+```
+
+Later, you can define probes:
+
+```#python
+>>> probes.manager.specs["probe-1"] = {
+    "target": "myapp.module.file.class.method",
+    "instrument": {
+        "type": "log",
+        "name": "myapp.method",
+        "value": "result",
+        "internal": False,
+        "custom": {},
+    },
+    "lifespan": 10,
+    "lastmodified": datetime.datetime.utcnow(),
+    "applied": {},
+}
+```
+
+Then call `probes.manager.apply()`, either when you add a probe, or on a
+schedule if your store is in MongoDB and the process defining probes is not
+the target process.
+
+The `applied` dictionary will be filled with information about which processes
+have applied the probe, and whether they encountered any errors.
diff --git a/setup.py b/setup.py
@@ -0,0 +1,17 @@
+from setuptools import find_packages, setup
+
+ext_modules = []
+
+
+setup(
+    name="probes",
+    version="1.0",
+    author="Robert Brewer",
+    author_email="dev@crunch.io",
+    packages=find_packages(where="src"),
+    package_dir={"": "src"},
+    include_package_data=True,
+    install_requires=["hunter>=2.2.0", "mock"],
+    ext_modules=ext_modules,
+    entry_points={},
+)
diff --git a/src/probes/__init__.py b/src/probes/__init__.py
@@ -0,0 +1,11 @@
+"""Probes, a library for instrumenting code at runtime."""
+
+from .managers import ProbeManager
+from . import instruments
+from .probelib import FunctionProbe
+
+# A global since it should be one per process.
+# You _may_ make another, but most people will just want the one.
+manager = ProbeManager()
+
+__all__ = ("FunctionProbe", "manager", "ProbeManager", "instruments")
diff --git a/src/probes/instruments.py b/src/probes/instruments.py
@@ -0,0 +1,172 @@
+"""Instruments which receive probe events."""
+
+import datetime
+import sys
+
+try:
+    from statsd import statsd
+except ImportError:
+    statsd = None
+
+
+omitted = object()
+
+
+class Instrument(object):
+    """An instrument which receives FunctionProbe events.
+
+    Attributes:
+        * name: a name for the instrument; may be used in output, such as
+                when constructing statsd metric names.
+        * value: a Python expression to be evaluated; its result is the
+                 "process variable" to be used as the instrument sees fit.
+        * internal: If True, evaluate the value in the context of the
+                    wrapped function (just before it returns). If False
+                    (the default), evaluate the value in a wrapper
+                    context, which contains the local variables:
+                        * result: the return value of the target function
+                        * start/end/elapsed: float times
+                        * now: datetime.datetime.utcnow()
+                        * args/kwargs: inputs to the target function; these are
+                          also included in locals() by their argnames.
+                        * frame: sys._getframe() of the patch wrapper
+
+        * expires: a datetime, after which point the instrument will not fire,
+                   or None to mean no expiration
+        * custom: a dict of any additional data for subclasses. May include
+                  other information for filtering events, set points for
+                  closed-loop controllers, or other information specific
+                  to the kind of instrument.
+    """
+
+    error_expiration = datetime.datetime(1970, 1, 1)
+
+    def __init__(self, name, value, internal, expires=None, custom=None, **kwargs):
+        self.name = name
+        self.value = value
+        self.internal = internal
+        self.expires = expires
+        self.custom = custom or {}
+
+    def __str__(self):
+        return "%s(name=%r, value=%r, internal=%r, expires=%r, custom=%r)" % (
+            self.__class__.__name__,
+            self.name,
+            self.value,
+            self.internal,
+            self.expires,
+            self.custom,
+        )
+
+    __repr__ = __str__
+
+    def evaluate(self, value, eval_context):
+        # Skip eval() if a local variable name
+        v = eval_context[1].get(value, omitted)
+        if v is omitted:
+            v = eval(value, *eval_context)
+        return v
+
+    def merge_tags(self, tags, eval_context):
+        eval_tags = self.custom.get("tags", None)
+        if eval_tags:
+            t = self.evaluate(eval_tags, eval_context)
+            if isinstance(t, dict):
+                t = ["%s:%s" % pair for pair in t.iteritems()]
+            if not isinstance(t, list):
+                raise TypeError("Cannot send non-list of tags: %s" % (t,))
+            tags = tags + t
+        return tags
+
+    def __call__(self, tags, eval_context):
+        raise NotImplementedError()
+
+    def check_call(self, probe, *args, **kwargs):
+        """Return True if this instrument should be applied, False otherwise.
+
+        By default, this always returns True. Override this in a subclass
+        to check the supplied function args/kwargs, or other state,
+        such as self.custom, environment variables, or threadlocals.
+        """
+        return probe.mgr.check_call(probe, self, *args, **kwargs)
+
+    def expire_due_to_error(self):
+        if self.error_expiration:
+            # Set self.expires to long ago, which keeps it from firing until:
+            # a) someone edits the probe, or
+            # b) processes restart, which could be new code that fixes things.
+            # Even if it doesn't, we only get ~1 error per process,
+            # not 1 per call to the target function.
+            self.expires = self.error_expiration
+
+
+class LogInstrument(Instrument):
+    """An instrument that prints a log message."""
+
+    MAX_CHARS = 2000
+    out = sys.stdout
+
+    def __call__(self, tags, eval_context):
+        v = self.evaluate(self.value, eval_context)
+        if v is None:
+            return
+
+        v = str(v)
+        if len(v) > self.MAX_CHARS:
+            v = v[: self.MAX_CHARS - 3] + "..."
+
+        tags = self.merge_tags(tags, eval_context)
+
+        t = str(tags)
+        if len(t) > self.MAX_CHARS:
+            t = t[: self.MAX_CHARS - 3] + "..."
+
+        self.emit(self.name, v, t)
+
+    def emit(self, name, value, tags):
+        self.out.write("Probe (%s)[tags=%s] = %s\n" % (name, tags, value))
+
+
+class StatsdInstrumentBase(Instrument):
+    """An instrument that sends a value to statsd."""
+
+    MAX_CHARS = 2000
+
+    def __call__(self, tags, eval_context):
+        v = self.evaluate(self.value, eval_context)
+        if v is None:
+            return
+
+        if not isinstance(v, (int, float, long)):
+            v = str(v)
+            if len(v) > self.MAX_CHARS:
+                v = v[: self.MAX_CHARS] + "..."
+            raise TypeError("Cannot send non-numeric metric: %s" % (v,))
+
+        self.emit(self.name, v, self.merge_tags(tags, eval_context))
+
+    def emit(self, name, value, tags):
+        raise NotImplementedError()
+
+
+class HistogramInstrument(StatsdInstrumentBase):
+    def emit(self, name, value, tags):
+        statsd.histogram(name, value, tags=tags)
+
+
+class IncrementInstrument(StatsdInstrumentBase):
+    def emit(self, name, value, tags):
+        statsd.increment(name, value, tags=tags)
+
+
+class ProbeTestInstrument(Instrument):
+    """An instrument that stores values in self.results."""
+
+    def __init__(self, *args, **kwargs):
+        Instrument.__init__(self, *args, **kwargs)
+        self.results = []
+
+    def __call__(self, tags, eval_context):
+        v = self.evaluate(self.value, eval_context)
+        tags = self.merge_tags(tags, eval_context)
+        self.results.append((tags, v))