Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cleanup and ignore old events #48

Merged
merged 5 commits into from
Jun 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 0 additions & 8 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,13 +1,8 @@
# Specific
.remote-sync.json
station_list.csv
config.ini
writer_app.py
*.h5
_build/
build/


# Python
*.pyc
*.pyo
Expand All @@ -23,6 +18,3 @@ build/
ehthumbs.db
Icon?
Thumbs.db

# logfiles generated by tests
tests/hisparc.log*
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ test: unittests linttest doctest

.PHONY: unittests
unittests:
coverage run -m unittest discover tests -v
coverage run -m unittest -v
coverage report

.PHONY: linttest
Expand Down
Empty file added tests/__init__.py
Empty file.
Empty file added tests/fake_datastore/logs/.keep
Empty file.
6 changes: 3 additions & 3 deletions tests/test_data/config.ini
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
[General]
log=hisparc.log
log=tests/fake_datastore/logs/hisparc.log
loglevel=debug
station_list=fake_datastore/station_list.csv
data_dir=fake_datastore
station_list=tests/fake_datastore/station_list.csv
data_dir=tests/fake_datastore/

[Writer]
sleep=5
Expand Down
65 changes: 24 additions & 41 deletions tests/test_writer_acceptance.py
Original file line number Diff line number Diff line change
@@ -1,64 +1,52 @@
"""
Acceptance tests for the writer
"""Acceptance tests for the writer

Check with data pickled by Python 2 and 3.

python 3
"""

import base64
import configparser
import os
import shutil
import sys
import unittest

from pathlib import Path
from unittest import mock

import tables

from numpy import array
from numpy.testing import assert_array_equal

self_path = os.path.dirname(__file__)
test_data_path = os.path.join(self_path, 'test_data/')

# configuration:
WRITER_PATH = os.path.join(self_path, '../')
DATASTORE_PATH = os.path.join(self_path, 'fake_datastore')
CONFIGFILE = os.path.join(test_data_path, 'config.ini')
from writer import writer_app

CONFIG = f"""
[General]
log=hisparc.log
loglevel=debug
station_list={DATASTORE_PATH}/station_list.csv
data_dir={DATASTORE_PATH}
"""

with open(CONFIGFILE, 'w') as f:
f.write(CONFIG)
self_path = Path(__file__).parent
test_data_path = self_path / 'test_data'

# Configuration
DATASTORE_PATH = self_path / 'fake_datastore'
CONFIGFILE = test_data_path / 'config.ini'
STATION_ID = 99
CLUSTER = 'amsterdam'

UPLOAD_CODES = ['CIC', 'SIN', 'WTR', 'CFG']
pickle_data_path = os.path.join(test_data_path, 'incoming_writer/')
PICKLE_DATA_PATH = test_data_path / 'incoming_writer'


def import_writer_app():
"""import the writer"""
sys.path.append(WRITER_PATH)
from writer import writer_app

def configure_writer_app():
"""configure the writer"""
writer_app.config = configparser.ConfigParser()
writer_app.config.read(CONFIGFILE)
return writer_app


def get_writer_app(writer_app=None):
"""return the WSGI application"""
if writer_app is None:
writer_app = import_writer_app()
if not hasattr(writer_app, 'config'):
writer_app = configure_writer_app()
return writer_app


@mock.patch('writer.store_events.MINIMUM_YEAR', 2016)
class TestWriterAcceptancePy2Pickles(unittest.TestCase):
"""Acceptance tests for python 2 pickles"""

Expand All @@ -69,15 +57,13 @@ def setUp(self):
self.station_id = STATION_ID
self.cluster = CLUSTER
self.filepath = '2017/2/2017_2_26.h5'
self.pickle_filename = {}
for upload_code in UPLOAD_CODES:
self.pickle_filename[upload_code] = os.path.join(
pickle_data_path,
f'writer_{self.pickle_version}_{upload_code}',
)
self.pickle_filename = {
upload_code: PICKLE_DATA_PATH / f'writer_{self.pickle_version}_{upload_code}'
for upload_code in UPLOAD_CODES
}

def tearDown(self):
self.clean_datastore()
shutil.rmtree(DATASTORE_PATH / '2017')

def test_event_acceptance(self):
self.writer_app.process_data(self.pickle_filename['CIC'])
Expand Down Expand Up @@ -140,17 +126,14 @@ def test_config_acceptance(self):
self.assertEqual(blobs[1], b'Hardware: 0 FPGA: 0')

def read_table(self, table):
path = os.path.join(DATASTORE_PATH, self.filepath)
path = DATASTORE_PATH / self.filepath
table_path = f'/hisparc/cluster_{self.cluster}/station_{self.station_id}/{table}'
with tables.open_file(path, 'r') as datafile:
t = datafile.get_node(table_path)
data = t.read()

return data

def clean_datastore(self):
shutil.rmtree(os.path.join(DATASTORE_PATH, '2017'))


class TestWriterAcceptancePy3Pickles(TestWriterAcceptancePy2Pickles):
"""Acceptance tests for python 3 pickles"""
Expand Down
71 changes: 30 additions & 41 deletions tests/test_wsgi_app.py
Original file line number Diff line number Diff line change
@@ -1,57 +1,47 @@
"""Acceptance tests for the datastore WSGI app"""

import functools
import glob
import hashlib
import os
import pickle
import sys
import unittest

from webtest import TestApp
from http import HTTPStatus
from pathlib import Path
from unittest import mock

self_path = os.path.dirname(__file__)
test_data_path = os.path.join(self_path, 'test_data/')
from webtest import TestApp

# configuration:
WSGI_APP_PATH = os.path.join(self_path, '../')
DATASTORE_PATH = os.path.join(self_path, 'fake_datastore')
CONFIGFILE = os.path.join(test_data_path, 'config.ini')
from wsgi import wsgi_app

CONFIG = f"""
[General]
log=hisparc.log
loglevel=debug
station_list={DATASTORE_PATH}/station_list.csv
data_dir={DATASTORE_PATH}
"""
self_path = Path(__file__).parent
test_data_path = self_path / 'test_data'

with open(CONFIGFILE, 'w') as f:
f.write(CONFIG)
# configuration:
DATASTORE_PATH = self_path / 'fake_datastore'
CONFIGFILE = test_data_path / 'config.ini'

STATION_ID = 99
PASSWORD = 'fake_station'

EVENTPY2 = os.path.join(test_data_path, 'incoming_http/py2_s510_100events')
EVENTPY3 = os.path.join(test_data_path, 'incoming_http/py3event')
EVENTSUS = os.path.join(test_data_path, 'incoming_http/suspicious_event')
EVENTPY2 = test_data_path / 'incoming_http/py2_s510_100events'
EVENTPY3 = test_data_path / 'incoming_http/py3event'
EVENTSUS = test_data_path / 'incoming_http/suspicious_event'


def import_wsgi_app():
def configure_wsgi_app():
"""import the WSGI application"""
sys.path.append(WSGI_APP_PATH)
from wsgi import wsgi_app

return functools.partial(wsgi_app.application, configfile=CONFIGFILE)


def get_wsgi_app(wsgi_app=None):
"""return the WSGI application"""
if wsgi_app is None:
wsgi_app = import_wsgi_app()
wsgi_app = configure_wsgi_app()
return wsgi_app


@mock.patch('wsgi.wsgi_app.MINIMUM_YEAR', 2016)
class TestWsgiAppAcceptance(unittest.TestCase):
def setUp(self):
self.station_id = STATION_ID
Expand All @@ -64,6 +54,7 @@ def tearDown(self):
def test_invalid_post_data(self):
resp = self.app.post('/', {})
self.assertEqual(resp.body, b'400') # invalid post data
self.assertEqual(resp.status_code, HTTPStatus.OK)
self.assert_num_files_in_datastore(incoming=0, suspicious=0)

def test_unpickling_error(self):
Expand Down Expand Up @@ -136,30 +127,28 @@ def upload(self, pickled_data, checksum=None):
}

response = self.app.post('/', data)
self.assertEqual(response.status_code, HTTPStatus.OK)
return response.body

def read_pickle(self, fn):
with open(fn, 'rb') as f:
pickle = f.read()
return pickle
event = fn.read_bytes()
return event

def files_in_folder(self, folder):
return glob.glob(folder + '/*')
def files_in_folder(self, path):
return [file for file in path.iterdir() if file.name != '.keep']

def clean_datastore(self):
for folder in ['incoming', 'tmp', 'suspicious']:
for fn in self.files_in_folder(os.path.join(DATASTORE_PATH, folder)):
os.remove(fn)
for folder in ['incoming', 'tmp', 'suspicious', 'logs']:
for filepath in self.files_in_folder(DATASTORE_PATH / folder):
filepath.unlink()

def assert_num_files_in_datastore(self, incoming=None, suspicious=None):
self.assertEqual(len(self.files_in_folder(os.path.join(DATASTORE_PATH, 'tmp'))), 0)
if incoming is not None:
self.assertEqual(len(self.files_in_folder(os.path.join(DATASTORE_PATH, 'incoming'))), incoming)
if suspicious is not None:
self.assertEqual(len(self.files_in_folder(os.path.join(DATASTORE_PATH, 'suspicious'))), suspicious)
def assert_num_files_in_datastore(self, incoming=0, suspicious=0):
self.assertEqual(len(self.files_in_folder(DATASTORE_PATH / 'tmp')), 0)
self.assertEqual(len(self.files_in_folder(DATASTORE_PATH / 'incoming')), incoming)
self.assertEqual(len(self.files_in_folder(DATASTORE_PATH / 'suspicious')), suspicious)

def assert_num_events_written(self, number_of_events):
fn = self.files_in_folder(os.path.join(DATASTORE_PATH, 'incoming'))[0]
fn = self.files_in_folder(DATASTORE_PATH / 'incoming')[0]
with open(fn, 'rb') as f:
data = pickle.load(f)
written_event_list = data['event_list']
Expand Down
7 changes: 6 additions & 1 deletion writer/store_events.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@

logger = logging.getLogger('writer.store_events')

MINIMUM_YEAR = 2020


def store_event(datafile, cluster, station_id, event):
"""Stores an event in the h5 filesystem
Expand Down Expand Up @@ -38,7 +40,7 @@ def store_event(datafile, cluster, station_id, event):
nanoseconds = eventheader['nanoseconds']
# make an extended timestamp, which is the number of nanoseconds since
# epoch
ext_timestamp = timestamp * int(1e9) + nanoseconds
ext_timestamp = timestamp * 1_000_000_000 + nanoseconds
row['timestamp'] = timestamp

if upload_codes['_has_ext_time']:
Expand Down Expand Up @@ -118,6 +120,9 @@ def store_event_list(data_dir, station_id, cluster, event_list):
timestamp = event['header']['datetime']
if timestamp:
date = timestamp.date()
if date.year < MINIMUM_YEAR:
logger.error(f'Old event ({date}), discarding event (station: {station_id})')
continue
if date != prev_date:
if datafile:
datafile.close()
Expand Down
Loading