Skip to content
Permalink
Browse files

Major refactoring. State tracked in mongodb, monolitic tracker split …

…into multiple workers.
  • Loading branch information...
MichaelVL committed Jul 2, 2016
1 parent 18420be commit 4f83a95b71cdb3af00d81d4bd0bab7326fe8f3aa
@@ -7,7 +7,7 @@
class Backend(object):
def __init__(self, config, subcfg):
self.debug = False
self.generation = None
self.generation = -1 # DB might start out with 'None'
self.config = config
self.subcfg = subcfg

@@ -9,18 +9,18 @@ def __init__(self, config, subcfg):
super(Backend, self).__init__(config, subcfg)
self.print_meta = getattr(subcfg, 'print_meta', False)

def print_state(self, state):
def print_state(self, db):
strfmt = '%Y:%m:%d %H:%M:%S'
time1 = state.first_timestamp.strftime(strfmt)
time2 = state.timestamp.strftime(strfmt)
print('Tracked period: {} - {} UTC. {} changesets.'.format(time1, time2, len(state.area_chgsets)))
self.print_chgsets(state, self.print_meta)
#time1 = state.first_timestamp.strftime(strfmt)
#time2 = state.timestamp.strftime(strfmt)
#print('Tracked period: {} - {} UTC. {} changesets.'.format(time1, time2, len(state.area_chgsets)))
self.print_chgsets(db, self.print_meta)

def print_chgsets(self, state, print_meta=False):
csets = state.area_chgsets
chginfo = state.area_chgsets_info
for chgid in csets[::-1]:
meta = chginfo[chgid]['meta']
def print_chgsets(self, db, print_meta=False):
for c in db.chgsets_ready():
cid = c['cid']
meta = db.chgset_get_meta(cid)
info = db.chgset_get_info(cid)
#print 'cset=', pprint.pprint(data)
if 'comment' in meta['tag'].keys():
comment = meta['tag']['comment']
@@ -32,7 +32,7 @@ def print_chgsets(self, state, print_meta=False):
source = ''
timestamp = oc.Changeset.get_timestamp(meta)[1]
htimestamp = HumanTime.date2human(timestamp)
print u' {0} \'{1}\' {2} (\'{3}\') \'{4}\' {5}'.format(chgid, meta['user'], htimestamp, timestamp, comment, source).encode('ascii','backslashreplace')
print u" {} \'{}\' {} ('{}') '{}' state={}".format(cid, meta['user'], htimestamp, timestamp, comment, info['state']).encode('ascii','backslashreplace')
if print_meta:
for k,v in meta.items():
print u' {0}:{1}'.format(k,v).encode('ascii','backslashreplace'),
@@ -13,16 +13,13 @@ def __init__(self, config, subcfg):
self.list_fname = config.getpath('path', 'BackendGeoJson')+'/'+subcfg['filename']
self.click_url = subcfg['click_url']
self.colours = col.ColourScheme()
self.print_chgsets(None)

self.generation = None
self.print_chgsets(None, None)

def print_state(self, state):
if self.generation != state.generation:
self.generation = state.generation
if len(state.area_chgsets) > 0:
self.print_chgsets(state.area_chgsets,
state.area_chgsets_info)
def print_state(self, db):
if self.generation != db.generation:
self.generation = db.generation
if db.chgsets.count() > 0:
self.print_chgsets(db)

def pprint(self, txt):
#print(txt.encode('utf8'), file=self.f)
@@ -68,13 +65,13 @@ def add_cset_bbox(self, geoj, meta):
}
geoj['features'].append(feature)

def print_chgsets(self, csets, info):
def print_chgsets(self, db):
geoj = { "type": "FeatureCollection",
"features": [] }
if csets and len(csets) > 0:
for chgid in csets[::-1]:
data = info[chgid]
self.add_cset_bbox(geoj, data['meta'])
if db:
for c in db.chgsets_ready():
cid = c['cid']
self.add_cset_bbox(geoj, db.chgset_get_meta(cid))

self.start_file(self.list_fname)
logger.debug('Data sent to json file={}'.format(geoj))
@@ -18,20 +18,32 @@ class Backend(Backend.Backend):
def __init__(self, config, subcfg):
super(Backend, self).__init__(config, subcfg)
self.list_fname = config.getpath('path', 'BackendHtml')+'/'+subcfg['filename']
#self.list_fname_old = config.getpath('path', 'BackendHtml')+'/'+config.get('filename_old', 'BackendHtml')
self.template_name = subcfg['template']

self.show_details = getattr(subcfg, 'show_details', True)
self.show_comments = getattr(subcfg, 'show_comments', True)
self.generation = None
self.last_chg_seen = None
self.last_update = datetime.datetime.now()

self.env = jinja2.Environment(loader=jinja2.FileSystemLoader(config.getpath('template_path', 'tracker')))
self.env.filters['js_datetime'] = self._js_datetime_filter
self.env.filters['utc_datetime'] = self._utc_datetime_filter

self.start_page(self.list_fname)
self.no_items()
self.end_page()

def print_state(self, state):
def _js_datetime_filter(self, value):
'''Jinja2 filter formatting timestamps in format understood by javascript'''
# See javascript date/time format: http://tools.ietf.org/html/rfc2822#page-14
JS_TIMESTAMP_FMT = '%a, %d %b %Y %H:%M:%S %z'
return value.strftime(JS_TIMESTAMP_FMT)

def _utc_datetime_filter(self, value):
TIMESTAMP_FMT = '%Y:%m:%d %H:%M:%S'
return value.strftime(TIMESTAMP_FMT)

def print_state(self, db):
now = datetime.datetime.now()
#if now.day != self.last_update.day:
# print('Cycler - new day: {} {}'.format(now.day, self.last_update.day))
@@ -41,18 +53,24 @@ def print_state(self, state):
# print('Cycler')
# state.clear_csets()
self.last_update = datetime.datetime.now()
if self.generation != state.generation:
self.generation = state.generation
if self.generation != db.generation:
self.generation = db.generation

self.start_page(self.list_fname)
template = state.env.get_template(self.template_name)
csets = state.area_chgsets
ctx = { 'state': state}
ctx['csets'] = csets[::-1]
ctx['csets_err'] = state.err_chgsets
ctx['csetinfo'] = state.area_chgsets_info
ctx['show_details'] = self.show_details
ctx['show_comments'] = self.show_comments
template = self.env.get_template(self.template_name)
ctx = { #'state': state,
'csets': [],
'csets_err': [],
'csetmeta': {},
'csetinfo': {},
'show_details': self.show_details,
'show_comments': self.show_comments }
for c in db.chgsets_ready():
cid = c['cid']
ctx['csets'].append(c)
#ctx['csets_err'] = state.err_chgsets
ctx['csetmeta'][cid] = db.chgset_get_meta(cid)
ctx['csetinfo'][cid] = db.chgset_get_info(cid)
logger.debug('Data passed to template: {}'.format(ctx))
self.pprint(template.render(ctx))
self.end_page()
@@ -9,6 +9,7 @@
import ColourScheme as col
import operator
import logging
import jinja2

logger = logging.getLogger(__name__)

@@ -18,63 +19,81 @@ def __init__(self, config, subcfg):
super(Backend, self).__init__(config, subcfg)
self.list_fname = config.getpath('path', 'BackendHtmlSummary')+'/'+subcfg['filename']
self.template_name = subcfg['template']
self.generation = None
self.print_state()
self.env = jinja2.Environment(loader=jinja2.FileSystemLoader(config.getpath('template_path', 'tracker')))
self.env.filters['js_datetime'] = self._js_datetime_filter
self.env.filters['utc_datetime'] = self._utc_datetime_filter
self.print_state(None)

def print_state(self, state=None):
def print_state(self, db):
force = True # Because otherwise 'summary_created' timestamp below is not updated
if not state or self.generation != state.generation or force:
if not db or self.generation != db.generation or force:
self.start_page(self.list_fname)
if not state:
if not db or not db.pointer:
self.pprint('Nothing here yet')
else:
template = state.env.get_template(self.template_name)
self.generation = state.generation
csets = state.area_chgsets
info = state.area_chgsets_info
template = self.env.get_template(self.template_name)
self.generation = db.generation
now = datetime.datetime.utcnow().replace(tzinfo=pytz.utc)
dbptr = db.pointer
data = {
'state': state,
'track_starttime': state.first_timestamp,
'track_endtime': state.timestamp,
'tracked_hours': (state.timestamp-state.first_timestamp).total_seconds()/3600,
'csets': [],
'track_starttime': dbptr['first_pointer']['timestamp'],
'track_endtime': dbptr['timestamp'],
'tracked_hours': (dbptr['timestamp']-dbptr['first_pointer']['timestamp']).total_seconds()/3600,
'summary_created': now,
'pointer_timestamp': state.pointer.timestamp()
'pointer_timestamp': dbptr['timestamp'],
'first_seqno': dbptr['first_pointer']['seqno'],
'latest_seqno': dbptr['seqno']-1,
'generation': self.generation
}
cset_tracked_hours = 0
if state.area_chgsets:
first_cset = state.area_chgsets[0]
first_cset_ts = oc.Changeset.get_timestamp(state.area_chgsets_info[first_cset]['meta'])[1]
cset_tracked_hours = (state.timestamp-first_cset_ts).total_seconds()/3600
data['cset_first'] = first_cset_ts
#if state.area_chgsets: # FIXME
# first_cset = state.area_chgsets[0]
# first_cset_ts = oc.Changeset.get_timestamp(state.area_chgsets_info[first_cset]['meta'])[1]
# cset_tracked_hours = (state.timestamp-first_cset_ts).total_seconds()/3600
# data['cset_first'] = first_cset_ts

users = {}
notes = 0
for chgid in csets:
meta = info[chgid]['meta']
csets_w_notes = 0
csets_w_addr_changes = 0
for c in db.chgsets_ready():
cid = c['cid']
meta = db.chgset_get_meta(cid)
info = db.chgset_get_info(cid)
user = meta['user']
users[user] = users.get(user,0) + 1
notes += int(meta['comments_count'])
notecnt = int(meta['comments_count'])
if notecnt > 0:
notes += int(meta['comments_count'])
csets_w_notes += 1
if int(info['misc']['dk_address_node_changes'])>0:
csets_w_addr_changes += 1
data['csets_with_notes'] = csets_w_notes
data['csets_with_addr_changes'] = csets_w_addr_changes

# Summarize edits and mileage - we don't do this incrementally
# since csets can be split over multiple diffs
edits = {'node': {'create':0, 'modify':0, 'delete':0},
'way': {'create':0, 'modify':0, 'delete':0},
'relation': {'create':0, 'modify':0, 'delete':0}}
mileage = {}
for chgid in csets[::-1]:
if 'truncated' in info[chgid]['state']:
for c in db.chgsets_ready():
cid = c['cid']
info = db.chgset_get_info(cid)
data['csets'].append(c)
if 'truncated' in info['state']:
continue
summary = info[chgid]['summary']
summary = info['summary']
for action in ['create', 'modify', 'delete']:
if summary['_'+action] > 0:
for type in ['node', 'way', 'relation']:
edits[type][action] += summary[action][type]
self.merge_int_dict(mileage, info[chgid]['mileage_m'])
self.merge_int_dict(mileage, info['mileage_m'])
data['edits'] = edits
data['users'] = users
data['notes'] = notes
data['csets'] = csets
logger.debug('Accumulated mileage: {}'.format(mileage))

mileage_bytype = []
if mileage:
@@ -90,13 +109,14 @@ def print_state(self, state=None):
sum += m
num_items += 1
data['mileage_bytype'] = mileage_bytype
if cset_tracked_hours>0:
data['mileage_meter'] = self._i2s(sum)
data['mileage_meter_per_hour'] = self._i2s(int(sum/cset_tracked_hours))
data['mileage_meter'] = self._i2s(sum)
# Rounding means that if cset_tracked_hours=0, then we pretend the current
# metrics are for one hours
data['mileage_meter_per_hour'] = self._i2s(int(sum/max(1,cset_tracked_hours)))

if hasattr(state, 'pointer'):
lag = now-state.pointer.timestamp()
data['lag_seconds'] = int(lag.seconds)
#if hasattr(state, 'pointer'): # FIXME
# lag = now-state.pointer.timestamp()
# data['lag_seconds'] = int(lag.seconds)

logger.debug('Data passed to template: {}'.format(data))
self.pprint(template.render(data))
@@ -1,38 +1,45 @@
FROM debian:jessie
#FROM resin/rpi-raspbian

RUN apt-get -y update && apt-get install -y supervisor git python python-pip python-shapely python-tz
RUN apt-get install -y nginx
RUN apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
MAINTAINER osmtracker@network42.dk

ENV DEBIAN_FRONTEND noninteractive

RUN apt-get -y update && apt-get install -y supervisor git python python-pip python-shapely python-tz python-dev mongodb nginx

RUN mkdir -p /html/dynamic
RUN mkdir -p /data/db

ADD html /html
RUN mkdir /html/jquery-2.1.3
ADD https://code.jquery.com/jquery-2.1.3.min.js /html/jquery-2.1.3/jquery.min.js

RUN mkdir -p /html/leaflet-0.7.7
ADD http://cdn.leafletjs.com/leaflet/v0.7.7/leaflet.css /html/leaflet-0.7.7/leaflet.css
ADD http://cdn.leafletjs.com/leaflet/v0.7.7/leaflet.js /html/leaflet-0.7.7/leaflet.js

RUN chown -R www-data:www-data /html
RUN mkdir -p /html/dynamic && chown -R www-data:www-data /html

COPY config/nginx.conf /etc/nginx/nginx.conf
COPY config/nginx-osmtracker.conf /etc/nginx/sites-enabled/default
COPY docker/config/nginx.conf /etc/nginx/nginx.conf
COPY docker/config/nginx-osmtracker.conf /etc/nginx/sites-enabled/default

RUN mkdir /osmtracker
RUN apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*

WORKDIR /osmtracker/
RUN git clone https://github.com/MichaelVL/osm-analytic-tracker.git
RUN mkdir /osmtracker
ADD requirements.txt /osmtracker/
RUN pip install -r /osmtracker/requirements.txt

WORKDIR /osmtracker/osm-analytic-tracker
COPY config/osmtracker.json config.json
RUN pip install -r /osmtracker/osm-analytic-tracker/requirements.txt
# Override requirements.txt for osmapi
RUN git clone https://github.com/MichaelVL/osmapi.git
#ADD denmark.poly region.poly
ADD http://download.geofabrik.de/europe/denmark.poly region.poly

RUN cp -r html/* /html/
ADD *.py config.json logging.conf worker.sh ./
RUN mkdir /osmtracker/templates
ADD templates templates/
RUN sed -i 's/"path": "html"/"path": "\/html\/dynamic"/' ./config.json

EXPOSE 80

ADD config/supervisord.conf /
ADD supervisord.conf /
CMD ["/usr/bin/supervisord", "-c", "/supervisord.conf"]
Oops, something went wrong.

0 comments on commit 4f83a95

Please sign in to comment.
You can’t perform that action at this time.