From 3735fa7e5e69122967fd4af7a2ae38035b6c39c5 Mon Sep 17 00:00:00 2001 From: Arne de Laat Date: Thu, 27 Jun 2024 23:34:08 +0200 Subject: [PATCH 1/6] Replace os.path by pathlib.Path --- writer/storage.py | 11 ++++------- writer/writer_app.py | 31 +++++++++++++++++-------------- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/writer/storage.py b/writer/storage.py index f2782fd..61f2002 100644 --- a/writer/storage.py +++ b/writer/storage.py @@ -1,7 +1,5 @@ """Storage docstrings""" -import os - import tables @@ -276,12 +274,11 @@ def open_or_create_file(data_dir, date): :param date: the event date """ - directory = os.path.join(data_dir, '%d/%d' % (date.year, date.month)) - file = os.path.join(directory, '%d_%d_%d.h5' % (date.year, date.month, date.day)) + directory = data_dir / f'{date.year}/{date.month}' + file = directory / f'{date.year}_{date.month}_{date.day}.h5' - if not os.path.exists(directory): - # create dir and parent dirs with mode rwxr-xr-x - os.makedirs(directory, 0o755) + # Ensure dir and parent directories exist with mode rwxr-xr-x + directory.mkdir(mode=0o755, parents=True, exists_ok=True) return tables.open_file(file, 'a') diff --git a/writer/writer_app.py b/writer/writer_app.py index 57841c7..130ef9b 100644 --- a/writer/writer_app.py +++ b/writer/writer_app.py @@ -8,11 +8,11 @@ import configparser import logging import logging.handlers -import os import pickle -import shutil import time +from pathlib import Path + from writer.store_events import store_event_list LEVELS = { @@ -53,31 +53,34 @@ def writer(configfile): level = LEVELS.get(config.get('General', 'loglevel'), logging.NOTSET) logger.setLevel(level=level) - queue = os.path.join(config.get('General', 'data_dir'), 'incoming') - partial_queue = os.path.join(config.get('General', 'data_dir'), 'partial') + data_dir = Path(config.get('General', 'data_dir')) + queue = data_dir / 'incoming' + partial_queue = data_dir / 'partial' + + sleep_duration = config.getint('Writer', 'sleep') # writer process try: while True: - entries = os.listdir(queue) + entries = queue.iterdir() if not entries: - time.sleep(config.getint('Writer', 'sleep')) + time.sleep(sleep_duration) for entry in entries: - path = os.path.join(queue, entry) - shutil.move(path, partial_queue) + partial_path = partial_queue / entry.name + entry.rename(partial_path) + + process_data(partial_path, data_dir) + partial_path.unlink() - path = os.path.join(partial_queue, entry) - process_data(path) - os.remove(path) except Exception: logger.exception('Exception occured, quitting.') -def process_data(file): +def process_data(file, data_dir): """Read data from a pickled object and store store in raw datastore""" - with open(file, 'rb') as handle: + with file.open('rb') as handle: try: data = pickle.load(handle) except UnicodeDecodeError: @@ -85,7 +88,7 @@ def process_data(file): data = decode_object(pickle.load(handle, encoding='bytes')) logger.debug(f"Processing data for station {data['station_id']}") - store_event_list(config.get('General', 'data_dir'), data['station_id'], data['cluster'], data['event_list']) + store_event_list(data_dir, data['station_id'], data['cluster'], data['event_list']) def decode_object(o): From 90e77f4a3c1e77adeafde2bf0b3878013e7fc278 Mon Sep 17 00:00:00 2001 From: Arne de Laat Date: Thu, 27 Jun 2024 23:40:18 +0200 Subject: [PATCH 2/6] More usage of pathlib --- tests/test_wsgi_app.py | 6 +++--- writer/writer_app.py | 2 +- wsgi/wsgi_app.py | 13 +++++++++---- 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/tests/test_wsgi_app.py b/tests/test_wsgi_app.py index 23380c3..e61d821 100644 --- a/tests/test_wsgi_app.py +++ b/tests/test_wsgi_app.py @@ -148,9 +148,9 @@ def assert_num_files_in_datastore(self, incoming=0, suspicious=0): self.assertEqual(len(self.files_in_folder(DATASTORE_PATH / 'suspicious')), suspicious) def assert_num_events_written(self, number_of_events): - fn = self.files_in_folder(DATASTORE_PATH / 'incoming')[0] - with open(fn, 'rb') as f: - data = pickle.load(f) + file_path = self.files_in_folder(DATASTORE_PATH / 'incoming')[0] + with file_path.open('rb') as file_handle: + data = pickle.load(file_handle) written_event_list = data['event_list'] self.assertEqual(len(written_event_list), number_of_events) diff --git a/writer/writer_app.py b/writer/writer_app.py index 130ef9b..e4684ff 100644 --- a/writer/writer_app.py +++ b/writer/writer_app.py @@ -54,7 +54,7 @@ def writer(configfile): logger.setLevel(level=level) data_dir = Path(config.get('General', 'data_dir')) - queue = data_dir / 'incoming' + queue = data_dir / 'incoming' partial_queue = data_dir / 'partial' sleep_duration = config.getint('Writer', 'sleep') diff --git a/wsgi/wsgi_app.py b/wsgi/wsgi_app.py index 6bda202..199cb08 100644 --- a/wsgi/wsgi_app.py +++ b/wsgi/wsgi_app.py @@ -9,6 +9,8 @@ import tempfile import urllib.parse +from pathlib import Path + from . import rcodes LEVELS = { @@ -133,7 +135,8 @@ def do_init(configfile): station_list except NameError: station_list = {} - with open(config.get('General', 'station_list')) as file: + station_list_path = Path(config.get('General', 'station_list')) + with station_list_path.open() as file: reader = csv.reader(file) for station in reader: if station: @@ -147,12 +150,14 @@ def store_data(station_id, cluster, event_list): logger.debug(f'Storing data for station {station_id}') - directory = os.path.join(config.get('General', 'data_dir'), 'incoming') - tmp_dir = os.path.join(config.get('General', 'data_dir'), 'tmp') + data_dir = Path(config.get('General', 'data_dir')) + + directory = data_dir / 'incoming' + tmp_dir = data_dir / 'tmp' if is_data_suspicious(event_list): logger.debug('Event list marked as suspicious.') - directory = os.path.join(config.get('General', 'data_dir'), 'suspicious') + directory = data_dir / 'suspicious' file = tempfile.NamedTemporaryFile(dir=tmp_dir, delete=False) logger.debug(f'Filename: {file.name}') From bcdde11f2c6da32f5074726bcf1aeb0af8074b0d Mon Sep 17 00:00:00 2001 From: Arne de Laat Date: Thu, 27 Jun 2024 23:40:34 +0200 Subject: [PATCH 3/6] Enable ruff PTH rules --- pyproject.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 32e69ca..e57145e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,7 +22,6 @@ ignore = [ 'PERF203', # Allow try-except in loop 'PLR0912', # Allow functions with many branches 'PT', # Not using pytest - 'PTH', # Still using os.path 'RET', # Allow elif/else after return 'S101', # Assert is used to prevent incorrect 'S105', # Some hardcoded test passwords From 420b5dbecfd89f91429e55b1fe313225bf5f1ab0 Mon Sep 17 00:00:00 2001 From: Arne de Laat Date: Fri, 28 Jun 2024 09:27:22 +0200 Subject: [PATCH 4/6] Fix typo in keyword and update unittests --- tests/test_writer_acceptance.py | 8 ++++---- writer/storage.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/test_writer_acceptance.py b/tests/test_writer_acceptance.py index b260966..4a9b593 100644 --- a/tests/test_writer_acceptance.py +++ b/tests/test_writer_acceptance.py @@ -66,7 +66,7 @@ def tearDown(self): shutil.rmtree(DATASTORE_PATH / '2017') def test_event_acceptance(self): - self.writer_app.process_data(self.pickle_filename['CIC']) + self.writer_app.process_data(self.pickle_filename['CIC'], DATASTORE_PATH) data = self.read_table('events') self.assertEqual(data['timestamp'], 1488093964) @@ -86,7 +86,7 @@ def test_event_acceptance(self): self.assertEqual(tr1, base64.decodebytes(tr1_b64)) def test_singles_acceptance(self): - self.writer_app.process_data(self.pickle_filename['SIN']) + self.writer_app.process_data(self.pickle_filename['SIN'], DATASTORE_PATH) data = self.read_table('singles') self.assertEqual(data['timestamp'], 1488094031) @@ -99,7 +99,7 @@ def test_singles_acceptance(self): self.assertEqual(len(blobs), 0) def test_weather_acceptance(self): - self.writer_app.process_data(self.pickle_filename['WTR']) + self.writer_app.process_data(self.pickle_filename['WTR'], DATASTORE_PATH) data = self.read_table('weather') self.assertEqual(data['timestamp'], 1488094084) @@ -111,7 +111,7 @@ def test_weather_acceptance(self): self.assertEqual(len(blobs), 0) def test_config_acceptance(self): - self.writer_app.process_data(self.pickle_filename['CFG']) + self.writer_app.process_data(self.pickle_filename['CFG'], DATASTORE_PATH) data = self.read_table('config') self.assertEqual(data['timestamp'], 1488125225) self.assertEqual(data['mas_ch1_thres_high'], 320) diff --git a/writer/storage.py b/writer/storage.py index 61f2002..7e2061d 100644 --- a/writer/storage.py +++ b/writer/storage.py @@ -278,7 +278,7 @@ def open_or_create_file(data_dir, date): file = directory / f'{date.year}_{date.month}_{date.day}.h5' # Ensure dir and parent directories exist with mode rwxr-xr-x - directory.mkdir(mode=0o755, parents=True, exists_ok=True) + directory.mkdir(mode=0o755, parents=True, exist_ok=True) return tables.open_file(file, 'a') From f8bfec227fbaf05f5ec46c98fce23936af42689f Mon Sep 17 00:00:00 2001 From: Arne de Laat Date: Sat, 29 Jun 2024 13:25:54 +0200 Subject: [PATCH 5/6] Remove unnecessary lines to call test module Call using `python -m unittest [...]` instead. --- tests/test_writer_acceptance.py | 4 ---- tests/test_wsgi_app.py | 4 ---- 2 files changed, 8 deletions(-) diff --git a/tests/test_writer_acceptance.py b/tests/test_writer_acceptance.py index 4a9b593..a168320 100644 --- a/tests/test_writer_acceptance.py +++ b/tests/test_writer_acceptance.py @@ -139,7 +139,3 @@ class TestWriterAcceptancePy3Pickles(TestWriterAcceptancePy2Pickles): """Acceptance tests for python 3 pickles""" pickle_version = 'py3' - - -if __name__ == '__main__': - unittest.main() diff --git a/tests/test_wsgi_app.py b/tests/test_wsgi_app.py index e61d821..1a9cb89 100644 --- a/tests/test_wsgi_app.py +++ b/tests/test_wsgi_app.py @@ -153,7 +153,3 @@ def assert_num_events_written(self, number_of_events): data = pickle.load(file_handle) written_event_list = data['event_list'] self.assertEqual(len(written_event_list), number_of_events) - - -if __name__ == '__main__': - unittest.main() From 484166e8c6c46637223fa286b970a2dcd74fce8f Mon Sep 17 00:00:00 2001 From: Arne de Laat Date: Sat, 29 Jun 2024 13:26:49 +0200 Subject: [PATCH 6/6] Add coverage report configuration --- pyproject.toml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index e57145e..3076a64 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -56,3 +56,8 @@ source = [ 'writer', 'wsgi', ] + +[tool.coverage.report] +show_missing = true +skip_empty = true +skip_covered = true