Skip to content

Commit

Permalink
Merge 8765482 into 983f37a
Browse files Browse the repository at this point in the history
  • Loading branch information
wpreimes committed Jan 22, 2024
2 parents 983f37a + 8765482 commit 3827017
Show file tree
Hide file tree
Showing 11 changed files with 171 additions and 26 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.rst
Expand Up @@ -5,6 +5,7 @@ Changelog
Unreleased changes in master branch
===================================

- Added `ismn collect_metadata` and `ismn export_geojson` CLI programs.
- Added method to NetworkCollection to export metadata as (geo)json.
- Added more options when plotting the station overview map.
- Network citation list updated.
Expand Down
10 changes: 5 additions & 5 deletions docs/examples/interface.ipynb
Expand Up @@ -92,7 +92,7 @@
"\n",
"# Either a .zip file or one folder that contains all networks, here we read from .zip\n",
"data_path = \"/tmp/Data_separate_files_header_20090101_20201231_9289_Cwpc_20221201.zip\"\n",
"ismn_data = ISMN_Interface(data_path)"
"ismn_data = ISMN_Interface(data_path, parallel=False)"
]
},
{
Expand Down Expand Up @@ -1510,7 +1510,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[1mMetadata for sensor 5TE_soil_moisture_0.050000_0.050000:\u001b[0m\n"
"\u001B[1mMetadata for sensor 5TE_soil_moisture_0.050000_0.050000:\u001B[0m\n"
]
},
{
Expand Down Expand Up @@ -1672,7 +1672,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[1mMetadata for sensor QMR102_precipitation_-1.400000_-1.400000:\u001b[0m\n"
"\u001B[1mMetadata for sensor QMR102_precipitation_-1.400000_-1.400000:\u001B[0m\n"
]
},
{
Expand Down Expand Up @@ -1828,7 +1828,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[1mMetadata for sensor 5TM_soil_moisture_0.000000_0.050000:\u001b[0m\n"
"\u001B[1mMetadata for sensor 5TM_soil_moisture_0.000000_0.050000:\u001B[0m\n"
]
},
{
Expand Down Expand Up @@ -2009,4 +2009,4 @@
},
"nbformat": 4,
"nbformat_minor": 4
}
}
3 changes: 2 additions & 1 deletion environment.yml
Expand Up @@ -3,7 +3,7 @@ channels:
- conda-forge
- defaults
dependencies:
- python>3.7,<3.12
# - python>3.7,<3.12
- numpy
- pandas
- matplotlib
Expand All @@ -14,6 +14,7 @@ dependencies:
- pygeogrids>=0.3.2
- configparser
- tqdm
- click
- more_itertools
- sphinx
- nbsphinx
Expand Down
3 changes: 3 additions & 0 deletions setup.cfg
Expand Up @@ -34,6 +34,7 @@ install_requires =
pygeogrids>=0.3.2
numpy
pandas
click
configparser
more_itertools
tqdm
Expand Down Expand Up @@ -67,6 +68,8 @@ testing =
# And any other entry points, for example:
# pyscaffold.cli =
# awesome = pyscaffoldext.awesome.extension:AwesomeExtension
console_scripts =
ismn = ismn.cli:ismn

[test]
# py.test options when running `python setup.py test`
Expand Down
83 changes: 83 additions & 0 deletions src/ismn/cli.py
@@ -0,0 +1,83 @@
import os
import click
from ismn.interface import ISMN_Interface

@click.command("collect_metadata", short_help="Collect all ISMN metadata.")
@click.argument('data_path', type=click.STRING)
@click.option('--meta_path', type=click.Path(writable=True), default=None,
help="Directory where the metadata should be stored. The file"
"will be created automatically. Existing metadata in this"
"directory will be replaced! If not specified, "
"we use DATA_PATH.")
@click.option('--parallel', '-p', is_flag=True, show_default=True,
default=False,
help="Pass this flag to activate parallel metadata collection "
"(recommended for large archives). Deactivated by default."
)
def collect_metadata(data_path, meta_path, parallel):
"""
Command line program to initialise ISMN metadata collection.
THIS WILL OVERWRITE ANY EXISTING METADATA!
\b
DATA_PATH: string
Path where the downloaded ISMN archive is stored. This is either
- The downloaded ISMN ZIP archive or
- A directory with network folders extracted from the ZIP archive.
ISMN data can be downloaded from https://ismn.earth after registration.
"""
# The docstring above is slightly different to the normal python one to
# display it properly on the command line.
if not os.path.exists(data_path):
raise ValueError("The passed DATA_PATH does not exist.")
if meta_path is not None:
os.makedirs(meta_path, exist_ok=True)
_ = ISMN_Interface(data_path, force_metadata_collection=True,
meta_path=meta_path, parallel=parallel)

@click.command("export_geojson", short_help="Export ISMN sensors to geojson.")
@click.argument('data_path', type=click.STRING)
@click.option('--file_out',
type=click.STRING, default=None,
help="Path to the json file that should be created. "
"If the file already exists it will be overwritten. "
"If not specified this is a file called "
"`ismn_sensors.json` and stored in the DATA_PATH.")
@click.option('--markercolor', '-m',
type=click.STRING, default='"#00aa00"', show_default=True,
help='Hex color (USE QUOTES!, e.g. "#00aa00") to assign to '
'markers in json file. The default color is green.')
def export_geojson(data_path, file_out, markercolor):
"""
Calls
Command line program to initialise ISMN metadata collection. THIS WILL
OVERWRITE ANY EXISTING METADATA!
\b
Parameters
----------
DATA_PATH: string
Path where the downloaded ISMN archive is stored. This is either
- The downloaded ISMN ZIP archive or
- A directory with network folders extracted from the ZIP archive.
ISMN data can be downloaded from https://ismn.earth after registration.
"""
# The docstring above is slightly different to the normal python one to
# display it properly on the command line.
markercolor = str(markercolor.replace('"', '').replace("'", ""))
if not os.path.exists(data_path):
raise ValueError("The passed DATA_PATH does not exist.")
ds = ISMN_Interface(data_path)
if file_out is None:
file_out = os.path.join(ds.root.root_dir, 'ismn_sensors.json')
os.makedirs(os.path.dirname(file_out), exist_ok=True)
print(f"Exporting geojson to: {file_out}")
ds.collection.export_geojson(file_out, markercolor=markercolor)


@click.group(short_help="ISMN Command Line Programs.")
def ismn():
pass

ismn.add_command(collect_metadata)
ismn.add_command(export_geojson)
6 changes: 4 additions & 2 deletions src/ismn/components.py
Expand Up @@ -19,6 +19,7 @@
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
import os.path

from pygeogrids import BasicGrid
from typing import Union
Expand Down Expand Up @@ -823,7 +824,8 @@ def export_citations(self, out_file=None):

return refs

def export_geojson(self, path, network=True, station=True, sensor=False,
def export_geojson(self, path, markercolor="#00aa00",
network=True, station=True, sensor=False,
depth=True, extra_props=None, **filter_kwargs):
"""
Filter sensors in collection and create geojson file containing all
Expand Down Expand Up @@ -867,7 +869,7 @@ def export_geojson(self, path, network=True, station=True, sensor=False,
],
},
"properties": {
"markerColor": "#00aa00",
"markerColor": markercolor,
"datasetProperties": []
}
}
Expand Down
12 changes: 9 additions & 3 deletions src/ismn/filecollection.py
Expand Up @@ -243,10 +243,16 @@ def build_from_scratch(

logging.info(f"Collecting metadata with {n_proc} processes.")

if not parallel:
hint = 'Hint: Use `parallel=True` to speed up metadata ' \
'generation for large datasets'
else:
hint = ''

print(
f"Processing metadata for all ismn stations into folder {root.path}.\n"
f"This may take a few minutes, but is only done once..."
f"\n{'Hint: Use `parallel=True` to speed up metadata generation for large datasets' if not parallel else ''}"
f"Processing metadata for all ismn stations into folder "
f"{root.path}.\n"
f"This may take a few minutes, but is only done once...\n{hint}"
)

process_stat_dirs = []
Expand Down
5 changes: 2 additions & 3 deletions src/ismn/filehandlers.py
Expand Up @@ -623,9 +623,8 @@ def __read_format_header_values(self) -> pd.DataFrame:
names=names,
usecols=[0, 1, 2, 3, 4],
skiprows=1,
#sep=" ",
sep=r'\s+',
low_memory=False,
delim_whitespace=True,
)

def __read_csv(self, names=None, usecols=None, skiprows=0, **kwargs):
Expand Down Expand Up @@ -672,7 +671,7 @@ def readf(
skiprows=skiprows,
usecols=usecols,
names=names,
delim_whitespace=True,
sep=r'\s+',
parse_dates=parse_dates,
engine="c",
**kwargs
Expand Down
10 changes: 8 additions & 2 deletions src/ismn/interface.py
Expand Up @@ -95,6 +95,9 @@ class ISMN_Interface:
Additional readers to collect station/sensor metadata
from external sources e.g. csv files.
See :class:`ismn.custom.CustomMetaReader`.
force_metadata_collection: bool, optional (default: False)
If true, will run metadata collection and replace any existing metadata
that would otherwise be re-used.
Raises
------
Expand Down Expand Up @@ -141,6 +144,7 @@ def __init__(
keep_loaded_data=False,
temp_root=gettempdir(),
custom_meta_reader=None,
force_metadata_collection=False,
):
self.climate, self.landcover = KOEPPENGEIGER, LANDCOVER
self.parallel = parallel
Expand All @@ -150,6 +154,7 @@ def __init__(
self.keep_loaded_data = keep_loaded_data

self.custom_meta_reader = custom_meta_reader
self.force_metadata_collection = force_metadata_collection

self.meta_path = meta_path
self.temp_root = temp_root
Expand Down Expand Up @@ -178,7 +183,7 @@ def activate_network(

meta_csv_file = meta_path / meta_csv_filename

if not os.path.isfile(meta_csv_file):
if not os.path.isfile(meta_csv_file) or self.force_metadata_collection:
self.__file_collection = IsmnFileCollection.build_from_scratch(
self.root,
parallel=self.parallel,
Expand Down Expand Up @@ -570,7 +575,8 @@ def read_ts(self, idx, return_meta=False):
m = pd.DataFrame(data={i: m})
metadata.append(m)

data = pd.concat(data, axis=1)
# would it make more sense to concat along time dimension?
data = pd.concat(data, axis=1).sort_index()

if return_meta:
meta = pd.concat(metadata, axis=1)
Expand Down
34 changes: 34 additions & 0 deletions tests/test_cli.py
@@ -0,0 +1,34 @@
import os
from click.testing import CliRunner
from ismn.cli import collect_metadata, export_geojson
from tempfile import TemporaryDirectory

testdata_root = os.path.join(os.path.dirname(__file__), "test_data")

def test_cli_meta_collect():
with TemporaryDirectory() as tempdir:
data_path = os.path.join(
testdata_root, "zip_archives", "ceop",
"Data_seperate_files_20170810_20180809.zip")
runner = CliRunner()
result = runner.invoke(collect_metadata,
[data_path, "--meta_path", tempdir, "-p"])
assert result.exit_code == 0
assert os.path.isfile(os.path.join(
tempdir, "Data_seperate_files_20170810_20180809.csv"))

def test_cli_export_geojson():
with TemporaryDirectory() as tempdir:
data_path = os.path.join(
testdata_root, "zip_archives", "ceop",
"Data_seperate_files_20170810_20180809.zip")
runner = CliRunner()
result = runner.invoke(export_geojson,
[data_path, "--file_out",
os.path.join(tempdir, "test.geojson"),
"-m", "testcolor"])
assert result.exit_code == 0
assert os.path.isfile(os.path.join(tempdir, "test.geojson"))
with open(os.path.join(tempdir, "test.geojson"), "r") as f:
content = f.readlines()
assert "testcolor" in content[0]
30 changes: 20 additions & 10 deletions tests/test_interface.py
Expand Up @@ -5,6 +5,7 @@
from datetime import datetime

import numpy as np
import pandas as pd
import pytest
import logging
from collections import OrderedDict
Expand All @@ -19,7 +20,8 @@ def test_metadata_dataframe():
# make sure that metadata.index represents same values as get_dataset_ids
with TemporaryDirectory() as metadata_path:
testdata = os.path.join(testdata_root, "Data_seperate_files_20170810_20180809")
ds_one = ISMN_Interface(testdata, meta_path=metadata_path, network='FR_Aqui')
ds_one = ISMN_Interface(testdata, meta_path=metadata_path, network='FR_Aqui',
force_metadata_collection=True)

assert np.all(ds_one.metadata.index.values == ds_one.get_dataset_ids(None, -np.inf, np.inf))
ids = ds_one.get_dataset_ids('soil_moisture')
Expand All @@ -37,7 +39,8 @@ def setUpClass(cls):
metadata_path = os.path.join(testdata, "python_metadata")

cleanup(metadata_path)
ds = ISMN_Interface(testdata, network=[], parallel=True)
ds = ISMN_Interface(testdata, network=[], parallel=True,
force_metadata_collection=False)
assert ds.networks == OrderedDict()
cls.testdata = testdata

Expand All @@ -56,8 +59,9 @@ def test_list(self):
assert len(self.ds.list_sensors(station="Barrow-ARM")) == 1

def test_network_for_station(self):
assert self.ds.network_for_station("Barrow-ARM") == "COSMOS"
assert self.ds.network_for_station("ARM-1") == "COSMOS"
with pytest.warns(DeprecationWarning):
assert self.ds.network_for_station("Barrow-ARM") == "COSMOS"
assert self.ds.network_for_station("ARM-1") == "COSMOS"

def test_stations_that_measure(self):
for s in self.ds.stations_that_measure("soil_moisture"):
Expand Down Expand Up @@ -120,9 +124,13 @@ def test_read_metadata(self):
data2, meta = self.ds.read_ts(1, return_meta=True)
assert all(meta == self.ds.read_metadata(1, format="pandas"))
d2, m2 = self.ds.read([0, 1], return_meta=True)
assert np.all(d2[1]['soil_moisture'].dropna() ==
data2['soil_moisture'].dropna())
assert np.all(m2[1].dropna() == meta.dropna())
pd.testing.assert_series_equal(
d2[1]['soil_moisture'].dropna(),
data2['soil_moisture'].dropna()
)
pd.testing.assert_series_equal(
m2[1].dropna(), meta.dropna(), check_names=False
)
assert self.ds.read_metadata(1, format="dict") is not None
assert self.ds.read_metadata([1], format="obj") is not None

Expand Down Expand Up @@ -231,9 +239,11 @@ def test_get_nearest_station(self):
assert net.stations[station.name].lon == should_lon
assert net.stations[station.name].lat == should_lat

station, dist = self.ds.find_nearest_station(
0, 0, return_distance=True, max_dist=100
)
with pytest.warns(UserWarning):
# expect a warning as no points are within the dist
station, dist = self.ds.find_nearest_station(
0, 0, return_distance=True, max_dist=100
)
assert station == dist == None

def test_citation(self):
Expand Down

0 comments on commit 3827017

Please sign in to comment.