Skip to content

Commit

Permalink
Merge branch 'feature/analytics' into develop
Browse files Browse the repository at this point in the history
  • Loading branch information
felliott committed Aug 23, 2016
2 parents 85bf142 + 1be036f commit 1b0dfc9
Show file tree
Hide file tree
Showing 19 changed files with 550 additions and 25 deletions.
29 changes: 29 additions & 0 deletions mfr/core/extension.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,26 @@
import abc

from mfr.core.metrics import MetricsRecord


class BaseExporter(metaclass=abc.ABCMeta):

def __init__(self, source_file_path, output_file_path, format):
self.source_file_path = source_file_path
self.output_file_path = output_file_path
self.format = format
self.exporter_metrics = MetricsRecord('exporter')
if self._get_module_name():
self.metrics = self.exporter_metrics.new_subrecord(self._get_module_name())

self.exporter_metrics.merge({
'class': self._get_module_name(),
'format': self.format,
'source_path': str(self.source_file_path),
'output_path': str(self.output_file_path),
# 'error': 'error_t',
# 'elapsed': 'elpased_t',
})

@abc.abstractmethod
def export(self):
Expand All @@ -26,6 +40,21 @@ def __init__(self, metadata, file_path, url, assets_url, export_url):
self.url = url
self.assets_url = '{}/{}'.format(assets_url, self._get_module_name())
self.export_url = export_url
self.renderer_metrics = MetricsRecord('renderer')
if self._get_module_name():
self.metrics = self.renderer_metrics.new_subrecord(self._get_module_name())

self.renderer_metrics.merge({
'class': self._get_module_name(),
'ext': self.metadata.ext,
'url': self.url,
'export_url': self.export_url,
'file_path': self.file_path,
'file_required': self.file_required,
'cache_result': self.cache_result,
# 'error': 'error_t',
# 'elapsed': 'elpased_t',
})

@abc.abstractmethod
def render(self):
Expand Down
157 changes: 157 additions & 0 deletions mfr/core/metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
import copy


def _merge_dicts(a, b, path=None):
""""merges b into a
Taken from: http://stackoverflow.com/a/7205107
"""
if path is None:
path = []
for key in b:
if key in a:
if isinstance(a[key], dict) and isinstance(b[key], dict):
_merge_dicts(a[key], b[key], path + [str(key)])
elif a[key] == b[key]:
pass # same leaf value
else:
raise Exception('Conflict at %s' % '.'.join(path + [str(key)]))
else:
a[key] = b[key]

return a


class MetricsBase():
"""Lightweight wrapper around a dict to make keeping track of metrics a little easier.
Current functionality is limited, but may be extended later. To do:
* update/override method to indicate expectations of existing key
self.metrics.add_default('some.flag', True)
<later>
self.metrics.override('some.flag', False) # dies if 'some.flag' doesn't already exist
* optional type validation?
self.metrics.add('some.flag', True, bool())
-or-
self.metrics.define('some.flag', bool())
<later>
self.metrics.add('some.flag', 'foobar') # dies, 'foobar' isn't a bool
"""

def __init__(self):
self._metrics = {}

def key(self):
"""ID string for this object"""
raise NotImplementedError

def add(self, key, value):
"""add() stores the given value under the given key. Subkeys can be specified by placing
a dot between the parent and child keys. e.g. 'foo.bar' will be interpreted as
``self._metrics['foo']['bar']``
:param str key: the key to store ``value`` under
:param value: the value to store, type unrestricted
"""
self._set_dotted_key(self._metrics, key, value)

def merge(self, record):
"""Merges a dict into the current metrics.
:param dict record: a dict to merge with the current metrics
"""
_merge_dicts(self._metrics, record)

def serialize(self):
"""Return a copy of the metrics"""
return copy.deepcopy(self._metrics)

def manifesto(self):
"""'This is who I am and this is what I stand for!'
Returns a dict with one entry: our key pointing to our metrics
"""
return {self.key: self.serialize()}

def _set_dotted_key(self, store, key, value):
"""Naive method to set nested dict values via dot-separated keys. e.g
``_set_dotted_keys(self._metrics, 'foo.bar', 'moo')`` is equivalent to
``self._metrics['foo']['bar'] = 'moo'``. This method is neither resilient nor intelligent
and will react with bad grace if one of the keys already exists and is not a dict key.
"""
parts = key.split('.')
current = store
for part in parts[:-1]:
if part not in current:
current[part] = {}

current = current[part]
current[parts[-1]] = value


class MetricsRecord(MetricsBase):
"""An extension to MetricsBase that carries a category and list of submetrics. When
serialized, will include the serialized child metrics
"""

def __init__(self, category):
super().__init__()
self.category = category
self.subrecords = []

@property
def key(self):
"""ID string for this record: '{category}'"""
return self.category

def serialize(self):
"""Returns its metrics with the metrics for each of the subrecords included under their key.
"""
metrics = super().serialize()
for subrecord in self.subrecords:
metrics[subrecord.key] = subrecord.serialize()

return metrics

def new_subrecord(self, name):
"""Create a new MetricsSubRecord object with our category and save it to the subrecords
list."""
subrecord = MetricsSubRecord(self.category, name)
self.subrecords.append(subrecord)
return subrecord


class MetricsSubRecord(MetricsRecord):
"""An extension to MetricsRecord that carries a name in addition to a category. Will identify
itself as {category}_{name}. Can create its own subrecord whose category will be this
subrecord's ``name``.
"""

def __init__(self, category, name):
super().__init__(category)
self.name = name

@property
def key(self):
"""ID string for this subrecord: '{category}_{name}'"""
return '{}_{}'.format(self.category, self.name)

def new_subrecord(self, name):
"""Creates and saves a new subrecord. The new subrecord will have its category set to the
parent subrecord's ``name``. ex::
parent = MetricsRecord('foo')
child = parent.new_subrecord('bar')
grandchild = child.new_subrecord('baz')
print(parent.key) # foo
print(child.key) # foo_bar
print(grandchild.key) # bar_baz
"""
subrecord = MetricsSubRecord(self.name, name)
self.subrecords.append(subrecord)
return subrecord
21 changes: 21 additions & 0 deletions mfr/core/provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

from mfr.core import exceptions
from mfr.server import settings
from mfr.core.metrics import MetricsRecord


class BaseProvider(metaclass=abc.ABCMeta):
Expand All @@ -24,6 +25,17 @@ def __init__(self, request, url):
code=400
)
self.url = url
self.provider_metrics = MetricsRecord('provider')
self.metrics = self.provider_metrics.new_subrecord(self.NAME)

self.provider_metrics.merge({
'type': self.NAME,
'url': str(self.url),
})

@abc.abstractproperty
def NAME(self):
raise NotImplementedError

@abc.abstractmethod
def metadata(self):
Expand All @@ -42,3 +54,12 @@ def __init__(self, name, ext, content_type, unique_key, download_url):
self.content_type = content_type
self.unique_key = unique_key
self.download_url = download_url

def serialize(self):
return {
'name': self.name,
'ext': self.ext,
'content_type': self.content_type,
'unique_key': str(self.unique_key),
'download_url': str(self.download_url),
}
137 changes: 137 additions & 0 deletions mfr/core/remote_logging.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
import copy
import json
import logging

import aiohttp
# from geoip import geolite2

import mfr
from mfr.server import settings

# import waterbutler
from waterbutler.core.utils import async_retry


logger = logging.getLogger(__name__)


@async_retry(retries=5, backoff=5)
async def log_analytics(request, metrics):
"""Send events to Keen describing the action that occurred."""
if settings.KEEN_PRIVATE_PROJECT_ID is None:
return

keen_payload = copy.deepcopy(metrics)
keen_payload['meta'] = {
'mfr_version': mfr.__version__,
# 'wb_version': waterbutler.__version__,
'epoch': 1,
}
keen_payload.update(request)
keen_payload['keen'] = {
'addons': [
{
'name': 'keen:url_parser',
'input': {
'url': 'request.url'
},
'output': 'request.info',
},
{ # private
'name': 'keen:ip_to_geo',
'input': {
'ip': 'tech.ip'
},
'output': 'geo',
},
{ # private
'name': 'keen:ua_parser',
'input': {
'ua_string': 'tech.ua',
},
'output': 'tech.info',
},
],
}

if request['referrer']['url'] is not None:
keen_payload['keen']['addons'].append({
'name': 'keen:referrer_parser',
'input': {
'referrer_url': 'referrer.url',
'page_url': 'request.url'
},
'output': 'referrer.info'
})
keen_payload['keen']['addons'].append({
'name': 'keen:url_parser',
'input': {
'url': 'referrer.url'
},
'output': 'referrer.info',
})

collection = 'mfr_action'

# send the private payload
await _send_to_keen(keen_payload, collection, settings.KEEN_PRIVATE_PROJECT_ID,
settings.KEEN_PRIVATE_WRITE_KEY, 'private')


async def _send_to_keen(payload, collection, project_id, write_key, domain='private'):
"""Serialize and send an event to Keen. If an error occurs, try up to five more times.
Will raise an excpetion if the event cannot be sent."""

serialized = json.dumps(payload).encode('UTF-8')
logger.debug("Serialized payload: {}".format(serialized))
headers = {
'Content-Type': 'application/json',
'Authorization': write_key,
}
url = '{0}/{1}/projects/{2}/events/{3}'.format(settings.KEEN_API_BASE_URL,
settings.KEEN_API_VERSION,
project_id, collection)

async with await aiohttp.request('POST', url, headers=headers, data=serialized) as resp:
if resp.status == 201:
logger.info('Successfully logged {} to {} collection in {} Keen'.format(
payload['handler']['type'], collection, domain
))
else:
raise Exception('Failed to log {} to {} collection in {} Keen. Status: {} Error: {}'.format(
payload['handler']['type'], collection, domain, str(int(resp.status)), await resp.read()
))
return


def _serialize_request(request):
"""Serialize the original request."""
if request is None:
return {}

headers_dict = {}
for (k, v) in sorted(request.headers.get_all()):
if k not in ('Authorization', 'Cookie', 'User-Agent',):
headers_dict[k] = v

serialized = {
'tech': {
'ip': request.remote_ip,
'ua': request.headers['User-Agent'],
},
'request': {
'method': request.method,
'url': request.full_url(),
'time': request.request_time(),
'headers': headers_dict,
},
'referrer': {
'url': None,
},
}

if 'Referer' in request.headers:
referrer = request.headers['Referer']
serialized['referrer']['url'] = referrer

return serialized

0 comments on commit 1b0dfc9

Please sign in to comment.