From b8ed68935b5dac75e98a7fc732e188f3254ca70b Mon Sep 17 00:00:00 2001 From: keeagnsmith21 Date: Mon, 27 May 2024 15:16:31 +0800 Subject: [PATCH 1/5] Added function to normalise date strings --- observatory_platform/date_utils.py | 34 +++++++++++++ observatory_platform/tests/test_date_utils.py | 49 +++++++++++++++++++ pyproject.toml | 1 + 3 files changed, 84 insertions(+) create mode 100644 observatory_platform/date_utils.py create mode 100644 observatory_platform/tests/test_date_utils.py diff --git a/observatory_platform/date_utils.py b/observatory_platform/date_utils.py new file mode 100644 index 000000000..43dd9a8cc --- /dev/null +++ b/observatory_platform/date_utils.py @@ -0,0 +1,34 @@ +# Copyright 2024 Curtin University +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Author: Keegan Smith + +from zoneinfo import ZoneInfo + +from dateutil import parser + + +def normalise_datetime(dt_string: str) -> str: + """ + Converts a date or datetime string to an isoformatted datetime string at +0000UTC + + :param dt_string: The string to convert + :return: The ISO formatted datetime string + """ + dt = parser.parse(dt_string) # Parse string to datetime object + if not dt.utcoffset(): # If no timezone present, assume +0000UTC + dt = dt.replace(tzinfo=ZoneInfo("UTC")) + dt = dt.astimezone(ZoneInfo("UTC")) + + return dt.isoformat() diff --git a/observatory_platform/tests/test_date_utils.py b/observatory_platform/tests/test_date_utils.py new file mode 100644 index 000000000..916eda479 --- /dev/null +++ b/observatory_platform/tests/test_date_utils.py @@ -0,0 +1,49 @@ +# Copyright 2024 Curtin University +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Author: Keegan Smith + +import unittest + +from observatory_platform.date_utils import normalise_datetime + + +class test_normalise_datetime(unittest.TestCase): + """Tests for normalise_datetime""" + + def test_good_inputs(self): + inputs = [ + "2024-01-01 12:00:00+0000", + "2024-01-01 00:00:00+0800", + "2024-01-01 12:00:00-0800", + "2024-01-01 12:00:00Z", + "2024-01-01 12:00:00UTC+1", + ] + expected_outputs = [ + "2024-01-01T12:00:00+00:00", + "2023-12-31T16:00:00+00:00", + "2024-01-01T20:00:00+00:00", + "2024-01-01T12:00:00+00:00", + "2024-01-01T13:00:00+00:00", + ] + for input, expected_output in zip(inputs, expected_outputs): + actual_output = normalise_datetime(input) + self.assertEqual(expected_output, actual_output) + + def test_missing_tz(self): + inputs = ["2024-01-01 00:00:00", "2024-01-01T12:00:00"] + expected_outputs = ["2024-01-01T00:00:00+00:00", "2024-01-01T12:00:00+00:00"] + for input, expected_output in zip(inputs, expected_outputs): + actual_output = normalise_datetime(input) + self.assertEqual(expected_output, actual_output) diff --git a/pyproject.toml b/pyproject.toml index 1d108b354..3d6c2b99d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -53,6 +53,7 @@ dependencies = [ "validators<=0.20.0", "xmltodict", "tenacity", + "dateutils", ] [project.optional-dependencies] From 745b4e729e5eb0d7f65cc68e3863d2860123fd1b Mon Sep 17 00:00:00 2001 From: keeagnsmith21 Date: Tue, 28 May 2024 13:08:48 +0800 Subject: [PATCH 2/5] fix dateutil dependendcy --- pyproject.toml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 3d6c2b99d..ee6ac830f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -53,7 +53,7 @@ dependencies = [ "validators<=0.20.0", "xmltodict", "tenacity", - "dateutils", + "dateutil", ] [project.optional-dependencies] @@ -144,8 +144,8 @@ optional_dependencies = ["tests"] # to load extras from [project.optional-depend # MIT license: https://pypi.org/project/ordereddict/1.1/ ordereddict = "1.1" -# MIT license: https://pypi.org/project/pendulum/1.4.4/ -pendulum = ">=1.4.4" +# MIT License: https://pypi.org/project/pendulum/3.0.0/ +pendulum = ">=3.0.0" # Python Imaging Library (PIL) License: https://github.com/python-pillow/Pillow/blob/master/LICENSE Pillow = ">=7.2.0" From e2caaf7c41604858a94c9a545a42552bf35655b3 Mon Sep 17 00:00:00 2001 From: keeagnsmith21 Date: Tue, 28 May 2024 13:35:23 +0800 Subject: [PATCH 3/5] dateutil -> python-dateutil --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index ee6ac830f..5bf1bae75 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -53,7 +53,7 @@ dependencies = [ "validators<=0.20.0", "xmltodict", "tenacity", - "dateutil", + "python-dateutil", ] [project.optional-dependencies] From f9eada339813e49fe9096d99fb9f98cb894e3c31 Mon Sep 17 00:00:00 2001 From: keeagnsmith21 Date: Wed, 29 May 2024 08:31:59 +0800 Subject: [PATCH 4/5] Revert pendulum version --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 5bf1bae75..ac53f14b0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -144,8 +144,8 @@ optional_dependencies = ["tests"] # to load extras from [project.optional-depend # MIT license: https://pypi.org/project/ordereddict/1.1/ ordereddict = "1.1" -# MIT License: https://pypi.org/project/pendulum/3.0.0/ -pendulum = ">=3.0.0" +# MIT license: https://pypi.org/project/pendulum/1.4.4/ +pendulum = ">=1.4.4" # Python Imaging Library (PIL) License: https://github.com/python-pillow/Pillow/blob/master/LICENSE Pillow = ">=7.2.0" From 314e7c7af090c1de0c8386dcd2b7c9b32bf62431 Mon Sep 17 00:00:00 2001 From: keeagnsmith21 Date: Wed, 29 May 2024 10:03:31 +0800 Subject: [PATCH 5/5] Updated fn to parse datetime objects as well --- observatory_platform/date_utils.py | 9 ++++-- observatory_platform/tests/test_date_utils.py | 31 +++++++++++++++---- 2 files changed, 31 insertions(+), 9 deletions(-) diff --git a/observatory_platform/date_utils.py b/observatory_platform/date_utils.py index 43dd9a8cc..bc55df23b 100644 --- a/observatory_platform/date_utils.py +++ b/observatory_platform/date_utils.py @@ -14,19 +14,22 @@ # Author: Keegan Smith +from datetime import datetime +from typing import Union from zoneinfo import ZoneInfo from dateutil import parser -def normalise_datetime(dt_string: str) -> str: +def datetime_normalise(dt: Union[str, datetime]) -> str: """ - Converts a date or datetime string to an isoformatted datetime string at +0000UTC + Converts a datetime object or string to an isoformatted datetime string at +0000UTC :param dt_string: The string to convert :return: The ISO formatted datetime string """ - dt = parser.parse(dt_string) # Parse string to datetime object + if isinstance(dt, str): + dt = parser.parse(dt) # Parse string to datetime object if not dt.utcoffset(): # If no timezone present, assume +0000UTC dt = dt.replace(tzinfo=ZoneInfo("UTC")) dt = dt.astimezone(ZoneInfo("UTC")) diff --git a/observatory_platform/tests/test_date_utils.py b/observatory_platform/tests/test_date_utils.py index 916eda479..c8640b0f9 100644 --- a/observatory_platform/tests/test_date_utils.py +++ b/observatory_platform/tests/test_date_utils.py @@ -14,15 +14,17 @@ # Author: Keegan Smith +from datetime import datetime import unittest +from zoneinfo import ZoneInfo -from observatory_platform.date_utils import normalise_datetime +from observatory_platform.date_utils import datetime_normalise class test_normalise_datetime(unittest.TestCase): """Tests for normalise_datetime""" - def test_good_inputs(self): + def test_str_inputs(self): inputs = [ "2024-01-01 12:00:00+0000", "2024-01-01 00:00:00+0800", @@ -38,12 +40,29 @@ def test_good_inputs(self): "2024-01-01T13:00:00+00:00", ] for input, expected_output in zip(inputs, expected_outputs): - actual_output = normalise_datetime(input) + actual_output = datetime_normalise(input) + self.assertEqual(expected_output, actual_output) + + def test_dt_inputs(self): + inputs = [ + datetime(2024, 1, 1, 12, 0, 0, tzinfo=ZoneInfo("UTC")), + datetime(2024, 1, 1, 12, 0, 0, tzinfo=ZoneInfo("Etc/GMT+1")), + datetime(2024, 1, 1, 0, 0, 0, tzinfo=ZoneInfo("Etc/GMT-1")), + datetime(2023, 12, 31, 23, 0, 0, tzinfo=ZoneInfo("Etc/GMT+1")), + ] + expected_outputs = [ + "2024-01-01T12:00:00+00:00", + "2024-01-01T13:00:00+00:00", + "2023-12-31T23:00:00+00:00", + "2024-01-01T00:00:00+00:00", + ] + for input, expected_output in zip(inputs, expected_outputs): + actual_output = datetime_normalise(input) self.assertEqual(expected_output, actual_output) def test_missing_tz(self): - inputs = ["2024-01-01 00:00:00", "2024-01-01T12:00:00"] - expected_outputs = ["2024-01-01T00:00:00+00:00", "2024-01-01T12:00:00+00:00"] + inputs = ["2024-01-01 00:00:00", "2024-01-01T12:00:00", datetime(2024, 1, 1, 12, 0, 0)] + expected_outputs = ["2024-01-01T00:00:00+00:00", "2024-01-01T12:00:00+00:00", "2024-01-01T12:00:00+00:00"] for input, expected_output in zip(inputs, expected_outputs): - actual_output = normalise_datetime(input) + actual_output = datetime_normalise(input) self.assertEqual(expected_output, actual_output)