diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 7ba6a9f..572a9ee 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -19,11 +19,11 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - python-version: ['3.8', '3.9', '3.10'] + python-version: ['3.8', '3.9', '3.10', '3.11'] os: ["ubuntu-latest"] include: - os: "windows-latest" - python-version: '3.10' + python-version: '3.11' steps: - uses: actions/checkout@v2 with: diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 7f5ea00..8474758 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -8,6 +8,7 @@ Unreleased changes in master branch - Added method to NetworkCollection to export metadata as (geo)json. - Added more options when plotting the station overview map. - Network citation list updated. +- Faster ISMN_Interface generation for large datasets by skipping some checks Version 1.3.4 ============= diff --git a/environment.yml b/environment.yml index 4093a6a..29bcda2 100644 --- a/environment.yml +++ b/environment.yml @@ -3,6 +3,7 @@ channels: - conda-forge - defaults dependencies: + - python>3.7,<3.12 - numpy - pandas - matplotlib diff --git a/src/ismn/components.py b/src/ismn/components.py index 5e93674..219d0a4 100644 --- a/src/ismn/components.py +++ b/src/ismn/components.py @@ -823,7 +823,8 @@ def export_citations(self, out_file=None): return refs - def export_geojson(self, path, extra_props=None, **filter_kwargs): + def export_geojson(self, path, network=True, station=True, sensor=False, + depth=True, extra_props=None, **filter_kwargs): """ Filter sensors in collection and create geojson file containing all features. @@ -832,12 +833,22 @@ def export_geojson(self, path, extra_props=None, **filter_kwargs): ---------- path: str Path to geojson file - extra_props: list[str] - List of extra properties to include in geojson file + network: bool, optional (default: True) + If True, network names are included in geojson file + station: bool, optional (default: True) + If True, station names are included in geojson file + sensor: bool, optional (default: False) + If True, sensor names are included in geojson file + depth: bool, optional (default: True) + If True, depth_from and depth_to are included in geojson file + extra_props: list[str], optional (default: None) + List of extra properties from sensor metadata to include in + geojson file By default only depth_from and depth_to are included e.g. ['timerange_from', 'timerange_to', 'variable', 'frm_class'] filter_kwargs: Keyword arguments to filter sensors in collection + see :func:`ismn.components.Sensor.eval` """ extra_props = extra_props or [] geoinfo = { @@ -845,42 +856,61 @@ def export_geojson(self, path, extra_props=None, **filter_kwargs): "features": [], } - for nw in self.iter_networks(): + for nw, stat, sens in self.iter_sensors(**filter_kwargs): feature = { "type": "Feature", "geometry": { - "type": "MultiPoint", - "coordinates": [], - "properties": { - "datasetName": nw.name, - "datasetVersion": 1, - "datasetProperties": [], - } + "type": "Point", + "coordinates": [ + stat.lon, + stat.lat + ], + }, + "properties": { + "markerColor": "#00aa00", + "datasetProperties": [] } } - for station, sensor in nw.iter_sensors(**filter_kwargs): - feature["geometry"]["coordinates"].append([ - station.lon, - station.lat - ]) - - feature["geometry"]["properties"]["datasetProperties"] += [ + if network: + feature["properties"]["datasetProperties"] += [ + { + "propertyName": "network", + "propertyValue": nw.name + } + ] + if station: + feature["properties"]["datasetProperties"] += [ + { + "propertyName": "station", + "propertyValue": stat.name + } + ] + if sensor: + feature["properties"]["datasetProperties"] += [ + { + "propertyName": "sensor", + "propertyValue": sens.name + } + ] + if depth: + feature["properties"]["datasetProperties"] += [ { "propertyName": "depth_from", - "propertyValue": sensor.depth[0] + "propertyValue": str(sens.depth[0]) }, { "propertyName": "depth_to", - "propertyValue": sensor.depth[1] - }, + "propertyValue": str(sens.depth[1]) + } + ] + + for prop in extra_props: + feature["properties"]["datasetProperties"] += [ + { + "propertyName": prop, + "propertyValue": str(sens.metadata[prop].val), + } ] - for prop in extra_props: - feature["geometry"]["properties"]["datasetProperties"] += [ - { - "propertyName": prop, - "propertyValue": str(sensor.metadata[prop].val), - } - ] geoinfo["features"].append(feature) diff --git a/src/ismn/filecollection.py b/src/ismn/filecollection.py index ddcbea0..d13cb16 100644 --- a/src/ismn/filecollection.py +++ b/src/ismn/filecollection.py @@ -22,9 +22,9 @@ import logging +import os from tempfile import gettempdir from pathlib import Path, PurePosixPath - import numpy as np from tqdm import tqdm from typing import Union @@ -32,9 +32,11 @@ from operator import itemgetter import time from typing import Tuple +import pandas as pd +from collections import OrderedDict from ismn.base import IsmnRoot -from ismn.const import * +import ismn.const as const from ismn.filehandlers import DataFile, StaticMetaFile from ismn.meta import MetaData, MetaVar, Depth @@ -60,8 +62,9 @@ def _read_station_dir( try: if len(csv) == 0: - raise IsmnFileError("Expected 1 csv file for station, found 0. " - "Use empty static metadata.") + raise const.IsmnFileError( + "Expected 1 csv file for station, found 0. " + "Use empty static metadata.") else: if len(csv) > 1: infos.append( @@ -70,10 +73,10 @@ def _read_station_dir( static_meta_file = StaticMetaFile( root, csv[0], load_metadata=True, temp_root=temp_root) station_meta = static_meta_file.metadata - except IsmnFileError as e: + except const.IsmnFileError as e: infos.append(f"Error loading static meta for station: {e}") station_meta = MetaData( - [MetaVar(k, v) for k, v in CSV_META_TEMPLATE.items()]) + [MetaVar(k, v) for k, v in const.CSV_META_TEMPLATE.items()]) data_files = root.find_files(stat_dir, "*.stm") @@ -345,9 +348,11 @@ def from_metadata_df(cls, data_root, metadata_df, temp_root=gettempdir()): f = DataFile( root=root, - file_path=str(PurePosixPath(row[-2])), + file_path=Path(str(PurePosixPath(row[-2]))), load_metadata=False, temp_root=temp_root, + verify_filepath=False, + verify_temp_root=False, ) f.metadata = metadata diff --git a/src/ismn/filehandlers.py b/src/ismn/filehandlers.py index 3a8aff1..0509b1b 100644 --- a/src/ismn/filehandlers.py +++ b/src/ismn/filehandlers.py @@ -23,17 +23,18 @@ import os import pandas as pd import warnings +import numpy as np +from tempfile import gettempdir, TemporaryDirectory +from pathlib import Path +from typing import Tuple, Union +import logging warnings.simplefilter(action="ignore", category=UserWarning) from ismn.base import IsmnRoot -from ismn.components import * from ismn import const from ismn.const import IsmnFileError -from ismn.meta import MetaVar, MetaData +from ismn.meta import MetaVar, MetaData, Depth -from tempfile import gettempdir, TemporaryDirectory -from pathlib import Path -from typing import Tuple class IsmnFile(object): """ @@ -50,9 +51,14 @@ class IsmnFile(object): Temporary directory metadata : MetaData File MetaData collection + verify_filepath : bool + Switch to activate file path verification + verify_temp_root : bool + Switch to activate temp root verification """ - def __init__(self, root, file_path, temp_root=gettempdir()): + def __init__(self, root, file_path, temp_root=gettempdir(), + verify_filepath=True, verify_temp_root=True): """ Parameters ---------- @@ -63,18 +69,27 @@ def __init__(self, root, file_path, temp_root=gettempdir()): temp_root : Path or str, optional (default : gettempdir()) Root directory where a separate subdir for temporary files will be created (and deleted). + verify_filepath: bool, optional (default: True) + Check if subpath is a valid path and adapt to archive format and os + verify_temp_root: bool, optional (default: True) + Check if temp_root is a valid path and create if if necessary """ if not isinstance(root, IsmnRoot): root = IsmnRoot(root) self.root = root - self.file_path = self.root.clean_subpath(file_path) - if self.file_path not in self.root: - raise IOError(f"Archive does not contain file: {self.file_path}") + if verify_filepath: + self.file_path = self.root.clean_subpath(file_path) + if self.file_path not in self.root: + raise IOError( + f"Archive does not contain file: {self.file_path}") + else: + self.file_path = file_path - if not os.path.exists(temp_root): - os.makedirs(temp_root, exist_ok=True) + if verify_temp_root: + if not os.path.exists(temp_root): + os.makedirs(temp_root, exist_ok=True) self.temp_root = temp_root @@ -353,7 +368,9 @@ def __init__(self, root, file_path, load_metadata=True, - temp_root=gettempdir()): + temp_root=gettempdir(), + *args, + **kwargs): """ Parameters ---------- @@ -366,9 +383,14 @@ def __init__(self, temp_root : Path or str, optional (default : gettempdir()) Root directory where a separate subdir for temporary files will be created (and deleted). + verify_filepath: bool, optional (default: True) + Check if subpath is a valid path and adapt to archive format and os + verify_temp_root: bool, optional (default: True) + Check if temp_root is a valid path and create if if necessary """ - super(DataFile, self).__init__(root, file_path, temp_root) + super(DataFile, self).__init__(root, file_path, temp_root, + *args, **kwargs) self.file_type = "undefined" self.posix_path = file_path diff --git a/src/ismn/interface.py b/src/ismn/interface.py index 63eedb0..c722d4c 100644 --- a/src/ismn/interface.py +++ b/src/ismn/interface.py @@ -21,18 +21,29 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. +import os +import numpy as np from pathlib import Path from tempfile import gettempdir import platform import sys import pandas as pd +from collections import OrderedDict +from collections.abc import Iterable +from typing import Union +import warnings +from ismn.components import NetworkCollection, Network from ismn.filecollection import IsmnFileCollection -from collections.abc import Iterable -from ismn.components import * -from ismn.const import * +from ismn.meta import Depth from ismn.base import IsmnRoot - +from ismn.const import ( + ISMNError, + KOEPPENGEIGER, + LANDCOVER, + deprecated, + CSV_META_TEMPLATE_SURF_VAR +) try: import cartopy.crs as ccrs import cartopy.feature as cfeature diff --git a/src/ismn/meta.py b/src/ismn/meta.py index 7740f63..4dc0bcf 100644 --- a/src/ismn/meta.py +++ b/src/ismn/meta.py @@ -23,7 +23,8 @@ from typing import Optional, List, Any, Union import pandas as pd -from ismn.const import * +import numpy as np +import ismn.const as const class Depth: @@ -63,11 +64,12 @@ def __init__(self, start, end): if self.across0: if self.start > 0: - raise DepthError("Start must be negative for Depths across 0") + raise const.DepthError( + "Start must be negative for Depths across 0") else: if abs(start) > abs(end): - raise DepthError("Depth end can not be further from 0" - " than depth start") + raise const.DepthError( + "Depth end can not be further from 0 than depth start") @property def is_profile(self) -> bool: @@ -288,7 +290,7 @@ def __init__(self, name: str, val: Any, depth: Depth = None): def __repr__(self): return ( f"{self.__class__.__name__}([{self.name}, {self.val}, " - f"{None.__repr__() if not self.depth else self.depth.__repr__()}])" + f"{str(None) if not self.depth else self.depth.__repr__()}])" ) def __getitem__(self, item: int): @@ -565,7 +567,7 @@ def replace(self, name, val, depth=None): self.metadata.remove(Var) self.metadata.append(MetaVar(name, val, depth)) else: - raise MetadataError( + raise const.MetadataError( "There is no MetaVar with name '{}'".format(name)) def best_meta_for_depth(self, depth): diff --git a/tests/test_components.py b/tests/test_components.py index 84ef255..c6e7eaf 100644 --- a/tests/test_components.py +++ b/tests/test_components.py @@ -78,13 +78,19 @@ def test_references(self): def test_json_dump(self): with TemporaryDirectory() as temp: - self.netcol.export_geojson(os.path.join(temp, "meta.json")) + self.netcol.export_geojson(os.path.join(temp, "meta.json"), + sensor=True) + with open(os.path.join(temp, "meta.json")) as f: meta_dict = json.load(f) for feature in meta_dict['features']: - net = feature['geometry']['properties']['datasetName'] - assert self.netcol[net][0][0].depth[0] == 0.5 - assert self.netcol[net][0][0].depth[1] == 1.0 + net_name = feature['properties']['datasetProperties'][0]['propertyValue'] + station_name = feature['properties']['datasetProperties'][1]['propertyValue'] + sensor_name = feature['properties']['datasetProperties'][2]['propertyValue'] + depth_from = feature['properties']['datasetProperties'][3]['propertyValue'] + depth_to = feature['properties']['datasetProperties'][4]['propertyValue'] + assert str(self.netcol[net_name][station_name][sensor_name].depth[0]) == depth_from + assert str(self.netcol[net_name][station_name][sensor_name].depth[1]) == depth_to class NetworkTest(unittest.TestCase): def setUp(self): diff --git a/tests/test_filecollection.py b/tests/test_filecollection.py index 1c1aca9..f8dcfb9 100644 --- a/tests/test_filecollection.py +++ b/tests/test_filecollection.py @@ -107,9 +107,12 @@ def test_from_csv(self): for thisfile, otherfile in zip( self.coll.iter_filehandlers(), other.iter_filehandlers() ): - assert thisfile.file_path == otherfile.file_path, "Paths dont match" - assert thisfile.root.path == otherfile.root.path, "Paths dont match" - assert thisfile.metadata == otherfile.metadata, "Meta dont match" + assert Path(thisfile.file_path) == Path(otherfile.file_path) + "Paths dont match" + assert Path(thisfile.root.path) == Path(otherfile.root.path) + "Paths dont match" + assert thisfile.metadata == otherfile.metadata + "Meta dont match" class Test_FileCollectionHeaderValuesUnzipped(Test_FileCollectionCeopSepUnzipped): @@ -160,3 +163,4 @@ def setUpClass(cls): cleanup(metadata_path) cls.coll = IsmnFileCollection.build_from_scratch(testdata_zip_path) + diff --git a/tests/test_interface.py b/tests/test_interface.py index 9d0d319..dd1d2f6 100644 --- a/tests/test_interface.py +++ b/tests/test_interface.py @@ -17,10 +17,9 @@ def test_metadata_dataframe(): # make sure that metadata.index represents same values as get_dataset_ids - testdata = os.path.join(testdata_root, "Data_seperate_files_20170810_20180809") - metadata_path = os.path.join(testdata, "python_metadata") - cleanup(metadata_path) - ds_one = ISMN_Interface(testdata, meta_path=metadata_path, network='FR_Aqui') + with TemporaryDirectory() as metadata_path: + testdata = os.path.join(testdata_root, "Data_seperate_files_20170810_20180809") + ds_one = ISMN_Interface(testdata, meta_path=metadata_path, network='FR_Aqui') assert np.all(ds_one.metadata.index.values == ds_one.get_dataset_ids(None, -np.inf, np.inf)) ids = ds_one.get_dataset_ids('soil_moisture') @@ -333,6 +332,3 @@ def setUp(self) -> None: self.ds = ISMN_Interface(self.testdata_zip_path) - -if __name__ == "__main__": - unittest.main()