diff --git a/.github/workflows/build_docs.yml b/.github/workflows/build_docs.yml index c6c9da267..2c8ad2207 100644 --- a/.github/workflows/build_docs.yml +++ b/.github/workflows/build_docs.yml @@ -1,9 +1,12 @@ -name: Build documentation (don't publish) +name: Build documentation on: push: - pull_request: - branches: [ main ] + branches: + - main + pull_request: + workflow_dispatch: + jobs: build: @@ -23,9 +26,3 @@ jobs: - name: Mkdocs build run: mkdocs build - - - name: Upload documentation - uses: actions/upload-artifact@v2 - with: - name: html - path: site/ \ No newline at end of file diff --git a/docs/api/comparer.md b/docs/api/comparer.md index 4e0ea41bb..163d97d9c 100644 --- a/docs/api/comparer.md +++ b/docs/api/comparer.md @@ -1,26 +1,26 @@ # Comparer -The `Comparer` class is the main class of the ModelSkill package. It holds the *matched* observation and model data for a *single* observation and has methods for plotting and skill assessment. +The `Comparer` class is the main class of the ModelSkill package. It is returned by [`match()`](matching.md/#modelskill.match), [`from_matched()`](matching.md/#modelskill.from_matched) or as an element in a [`ComparerCollection`](comparercollection.md). It holds the *matched* observation and model data for a *single* observation and has methods for plotting and skill assessment. Main functionality: * selecting/filtering data - - `sel()` - - `query()` + - [`sel()`](#modelskill.Comparer.sel) + - [`query()`](#modelskill.Comparer.query) * skill assessment - - `skill()` - - `gridded_skill()` (for track observations) + - [`skill()`](#modelskill.Comparer.skill) + - [`gridded_skill()`](#modelskill.Comparer.gridded_skill) (for track observations) * plotting - - `plot.timeseries()` - - `plot.scatter()` - - `plot.kde()` - - `plot.qq()` - - `plot.hist()` - - `plot.box()` + - [`plot.timeseries()`](#modelskill.comparison._comparer_plotter.ComparerPlotter.timeseries) + - [`plot.scatter()`](#modelskill.comparison._comparer_plotter.ComparerPlotter.scatter) + - [`plot.kde()`](#modelskill.comparison._comparer_plotter.ComparerPlotter.kde) + - [`plot.qq()`](#modelskill.comparison._comparer_plotter.ComparerPlotter.qq) + - [`plot.hist()`](#modelskill.comparison._comparer_plotter.ComparerPlotter.hist) + - [`plot.box()`](#modelskill.comparison._comparer_plotter.ComparerPlotter.box) * load/save/export data - - `load()` - - `save()` - - `to_dataframe()` + - [`load()`](#modelskill.Comparer.load) + - [`save()`](#modelskill.Comparer.save) + - [`to_dataframe()`](#modelskill.Comparer.to_dataframe) diff --git a/docs/api/comparercollection.md b/docs/api/comparercollection.md index 0941b1aa3..2ec69dc5d 100644 --- a/docs/api/comparercollection.md +++ b/docs/api/comparercollection.md @@ -1,6 +1,25 @@ # ComparerCollection -The `ComparerCollection` is one of the main objects of the `modelskill` package. It is collection of `Comparer` objects and is returned by the `match()` method of the `Model` class. +The `ComparerCollection` is one of the main objects of the `modelskill` package. It is a collection of [`Comparer`](comparer.md) objects and created either by the [`match()`](matching.md/#modelskill.match) method, by passing a list of Comparers to the [`ComparerCollection`](comparercollection.md/#modelskill.ComparerCollection) constructor, or by reading a config file using the [`from_config()`](matching.md/#modelskill.from_config) function. + +Main functionality: + +* selecting/filtering data + - `__get_item__()` - get a single Comparer, e.g., `cc[0]` or `cc['obs1']` + - [`sel()`](#modelskill.ComparerCollection.sel) + - [`query()`](#modelskill.ComparerCollection.query) +* skill assessment + - [`skill()`](#modelskill.ComparerCollection.skill) + - [`mean_skill()`](#modelskill.ComparerCollection.mean_skill) + - [`gridded_skill()`](#modelskill.ComparerCollection.gridded_skill) (for track observations) +* plotting + - [`plot.scatter()`](#modelskill.comparison._collection_plotter.ComparerCollectionPlotter.scatter) + - [`plot.kde()`](#modelskill.comparison._collection_plotter.ComparerCollectionPlotter.kde) + - [`plot.hist()`](#modelskill.comparison._collection_plotter.ComparerCollectionPlotter.hist) +* load/save/export data + - [`load()`](#modelskill.ComparerCollection.load) + - [`save()`](#modelskill.ComparerCollection.save) + ::: modelskill.ComparerCollection diff --git a/docs/api/matching.md b/docs/api/matching.md index 12683eab2..0b8ef9cdf 100644 --- a/docs/api/matching.md +++ b/docs/api/matching.md @@ -3,8 +3,8 @@ A Comparer/ComparerCollection can be created in one of the following ways: * [`match()`](#modelskill.match) - match observations and model results -* [`from_matched()`](#modelskill.from_matched) - create a Comparer/ComparerCollection from matched data -* [`from_config()`](#modelskill.from_config) - create a Comparer/ComparerCollection from a config file +* [`from_matched()`](#modelskill.from_matched) - create a Comparer from matched data +* [`from_config()`](#modelskill.from_config) - create a ComparerCollection from a config file ::: modelskill.match diff --git a/docs/api/model/dummy.md b/docs/api/model/dummy.md new file mode 100644 index 000000000..3619e7ba0 --- /dev/null +++ b/docs/api/model/dummy.md @@ -0,0 +1,3 @@ +# DummyModelResult + +::: modelskill.DummyModelResult diff --git a/docs/api/model/index.md b/docs/api/model/index.md index b0ab434d1..333d80219 100644 --- a/docs/api/model/index.md +++ b/docs/api/model/index.md @@ -1,19 +1,13 @@ # Model Result -## Types of model results - A model result can either be a simple point/track, or spatial field (e.g. 2d dfsu file) from which data can be *extracted* at the observation positions by spatial interpolation. The following types are available: * Timeseries - - `PointModelResult` - a point result from a dfs0/nc file or a DataFrame - - `TrackModelResult` - a track (moving point) result from a dfs0/nc file or a DataFrame + - [`PointModelResult`](point.md) - a point result from a dfs0/nc file or a DataFrame + - [`TrackModelResult`](track.md) - a track (moving point) result from a dfs0/nc file or a DataFrame * SpatialField (extractable) - - `GridModelResult` - a spatial field from a dfs2/nc file or a Xarray Dataset - - `DfsuModelResult` - a spatial field from a dfsu file - -A model result can be created by explicitly invoking one of the above classes or using the `model_result()` function which will return the appropriate type based on the input data (if possible). - + - [`GridModelResult`](grid.md) - a spatial field from a dfs2/nc file or a Xarray Dataset + - [`DfsuModelResult`](dfsu.md) - a spatial field from a dfsu file -## model_result() +A model result can be created by explicitly invoking one of the above classes or using the [`model_result()`](model_result.md) function which will return the appropriate type based on the input data (if possible). -::: modelskill.model_result diff --git a/docs/api/model/model_result.md b/docs/api/model/model_result.md new file mode 100644 index 000000000..42996a8a6 --- /dev/null +++ b/docs/api/model/model_result.md @@ -0,0 +1,3 @@ +# model_result() + +::: modelskill.model_result diff --git a/docs/api/model/point.md b/docs/api/model/point.md index e9c9eb2bd..06a8843b0 100644 --- a/docs/api/model/point.md +++ b/docs/api/model/point.md @@ -1,3 +1,5 @@ # PointModelResult -::: modelskill.PointModelResult \ No newline at end of file +::: modelskill.PointModelResult + +::: modelskill.timeseries._plotter.MatplotlibTimeSeriesPlotter diff --git a/docs/api/model/track.md b/docs/api/model/track.md index 6d3f05043..443820275 100644 --- a/docs/api/model/track.md +++ b/docs/api/model/track.md @@ -1,3 +1,5 @@ # TrackModelResult -::: modelskill.TrackModelResult \ No newline at end of file +::: modelskill.TrackModelResult + +::: modelskill.timeseries._plotter.MatplotlibTimeSeriesPlotter diff --git a/docs/api/observation/index.md b/docs/api/observation/index.md index bb8ef65d1..9ae8391c8 100644 --- a/docs/api/observation/index.md +++ b/docs/api/observation/index.md @@ -2,12 +2,8 @@ ModelSkill supports two types of observations: -* `PointObservation` - a point timeseries from a dfs0/nc file or a DataFrame -* `TrackObservation` - a track (moving point) timeseries from a dfs0/nc file or a DataFrame +* [`PointObservation`](point.md) - a point timeseries from a dfs0/nc file or a DataFrame +* [`TrackObservation`](track.md) - a track (moving point) timeseries from a dfs0/nc file or a DataFrame -An observation can be created by explicitly invoking one of the above classes or using the `observation()` function which will return the appropriate type based on the input data (if possible). +An observation can be created by explicitly invoking one of the above classes or using the [`observation()`](observation.md) function which will return the appropriate type based on the input data (if possible). - -## observation() - -::: modelskill.observation diff --git a/docs/api/observation/observation.md b/docs/api/observation/observation.md new file mode 100644 index 000000000..4072e4725 --- /dev/null +++ b/docs/api/observation/observation.md @@ -0,0 +1,3 @@ +# observation() + +::: modelskill.observation diff --git a/docs/api/observation/point.md b/docs/api/observation/point.md index 51f92143f..c67ee5419 100644 --- a/docs/api/observation/point.md +++ b/docs/api/observation/point.md @@ -1,3 +1,5 @@ # PointObservation -::: modelskill.PointObservation \ No newline at end of file +::: modelskill.PointObservation + +::: modelskill.timeseries._plotter.MatplotlibTimeSeriesPlotter diff --git a/docs/api/observation/track.md b/docs/api/observation/track.md index 77a4bc772..27a305eb4 100644 --- a/docs/api/observation/track.md +++ b/docs/api/observation/track.md @@ -1,3 +1,5 @@ # TrackObservation -::: modelskill.TrackObservation \ No newline at end of file +::: modelskill.TrackObservation + +::: modelskill.timeseries._plotter.MatplotlibTimeSeriesPlotter diff --git a/docs/images/dhi-white-logo.svg b/docs/images/dhi-white-logo.svg new file mode 100644 index 000000000..dc15bed4f --- /dev/null +++ b/docs/images/dhi-white-logo.svg @@ -0,0 +1,11 @@ + + + + + + + + + + + diff --git a/docs/images/obs_timeseries.png b/docs/images/obs_timeseries.png new file mode 100644 index 000000000..a7beeb396 Binary files /dev/null and b/docs/images/obs_timeseries.png differ diff --git a/docs/images/plot_taylor.png b/docs/images/plot_taylor.png new file mode 100644 index 000000000..f59f93c47 Binary files /dev/null and b/docs/images/plot_taylor.png differ diff --git a/docs/images/spatial_overview.png b/docs/images/spatial_overview.png new file mode 100644 index 000000000..9b53e5c96 Binary files /dev/null and b/docs/images/spatial_overview.png differ diff --git a/docs/images/temporal_coverage.png b/docs/images/temporal_coverage.png new file mode 100644 index 000000000..3bc3e13db Binary files /dev/null and b/docs/images/temporal_coverage.png differ diff --git a/docs/images/wind_rose.png b/docs/images/wind_rose.png new file mode 100644 index 000000000..d4541f48d Binary files /dev/null and b/docs/images/wind_rose.png differ diff --git a/docs/index.md b/docs/index.md index a21db861e..637658376 100644 --- a/docs/index.md +++ b/docs/index.md @@ -2,53 +2,57 @@ # ModelSkill: Assess the skill of your MIKE model +![OS](https://img.shields.io/badge/OS-Windows%20%7C%20Linux-blue) ![Python version](https://img.shields.io/pypi/pyversions/modelskill.svg) ![PyPI version](https://badge.fury.io/py/modelskill.svg) + Compare results from MIKE simulations with observations. [ModelSkill](https://github.com/DHI/modelskill) would like to be your companion during the different phases of a MIKE modelling workflow. -## Installation +**Useful links**: +[Terminology](user-guide/terminology.md) | +[Overview](user-guide/overview.md) | +[Plotting](user-guide/plotting.md) | +[Issues](https://github.com/DHI/modelskill/issues) | +[Discussion](https://github.com/DHI/modelskill/discussions) + + +
+ +- :material-clock-fast:{ .lg .middle } __Set up in 5 minutes__ + + --- + + Install **ModelSkill** with [`pip`](https://pypi.org/project/modelskill/) and get up + and running in minutes -ModelSkill is available as open-source on PyPI and can be installed with pip: + [:octicons-arrow-right-24: Getting started](user-guide/getting-started.md) -```bash -$ pip install modelskill -``` +- :fontawesome-brands-python:{ .lg .middle } __It's just Python__ -ModelSkill is compatible with Python 3.8 and later versions on Windows and Linux. + --- + Focus on your modelling and less on generate a validation report -## Getting started + [:octicons-arrow-right-24: API Reference](api/index.md) -Are your observations and model results already matched? +- :fontawesome-solid-ruler:{ .lg .middle } __Made to measure__ -```python -import modelskill as ms -cmp = ms.from_matched("matched_data.dfs0", obs_item="obs_WL", mod_item="WL") -cmp.skill() -``` + --- -Or do you need to match the observations and results first? + Choose between different skill metrics and customizable tables and charts -```python -import modelskill as ms -o = ms.PointObservation("obs.dfs0", item="obs_WL") -mr = ms.PointModelResult("model.dfs0", item="WL") -cmp = ms.match(o, mr) -cmp.skill() -``` + [:octicons-arrow-right-24: Metrics](api/metrics.md) -Read more in the [Getting started guide](getting-started.md) or in the [overview](overview.md) of the package. +- :material-scale-balance:{ .lg .middle } __Open Source, MIT__ + --- -## Resources + ModelSkill is licensed under MIT and available on [GitHub](https://github.com/DHI/modelskill) -- [Documentation](https://dhi.github.io/modelskill/) (this site) -- [Getting started guide](getting-started.md) -- [Example notebooks](https://nbviewer.jupyter.org/github/DHI/modelskill/tree/main/notebooks/) -- [PyPI](https://pypi.org/project/modelskill/) -- [Source code](https://github.com/DHI/modelskill/) + [:octicons-arrow-right-24: License](license.md) +
diff --git a/docs/license.md b/docs/license.md new file mode 100644 index 000000000..166acc7ef --- /dev/null +++ b/docs/license.md @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2024 DHI + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/docs/terminology.md b/docs/terminology.md deleted file mode 100644 index 7b14844d0..000000000 --- a/docs/terminology.md +++ /dev/null @@ -1,68 +0,0 @@ -# Terminology - -`ModelSkill` is a library for assessing the skill of numerical models. It provides tools for comparing model results with observations, plotting the results and calculating validation metrics. This page defines some of the key terms used in the documentation. - - -## General terminology - -### Skill -**Skill** refers to the ability of a numerical model to accurately represent the real-world phenomenon it aims to simulate. It is a measure of how well the model performs in reproducing the observed system. Skill can be assessed using various metrics, such as accuracy, precision, and reliability, depending on the specific goals of the model and the nature of the data. In `ModelSkill`, [`skill`](api/skill.md) is also a specific method on [Comparer](#comparer) objects that returns a skill table with aggregated skill scores per observation and model for a list of selected [metrics](#metric). - - -### Validation -**Validation** is the process of assessing the model's performance by comparing its output to real-world observations or data collected from the system being modeled. It helps ensure that the model accurately represents the system it simulates. Validation is typically performed before the model is used for prediction or decision-making. - - -### Calibration -**Calibration** is the process of adjusting the model's parameters or settings to improve its performance. It involves fine-tuning the model to better match observed data. Calibration aims to reduce discrepancies between model predictions and actual measurements. At the end of the calibration process, the calibrated model should be validated with independent data. - - -### Performance -**Performance** is a measure of how well a numerical model operates in reproducing the observed system. It can be assessed using various metrics, such as accuracy, precision, and reliability, depending on the specific goals of the model and the nature of the data. In this context, **performance** is synonymous with **skill**. - - -### Timeseries -A **timeseries** is a sequence of data points in time. In `ModelSkill`, The data can either be from [observations](#observation) or [model results](#model-result). Timeseries can univariate or multivariate; ModelSkill primarily supports univariate timeseries. Multivariate timeseries can be assessed one variable at a time. Timeseries can also have different spatial dimensions, such as point, track, line, or area. - - -### Observation -An **observation** refers to real-world data or measurements collected from the system you are modeling. Observations serve as a reference for assessing the model's performance. These data points are used to compare with the model's predictions during validation and calibration. Observations are usually based on field measurements or laboratory experiments, but for the purposes of model validation, they can also be derived from other models (e.g. a reference model). `ModelSkill` supports [point](api/observation/point.md) and [track](api/observation/track.md) observation types. - - -### Measurement -A **measurement** is called [observation](#observation) in `ModelSkill`. - - -### Model result -A **model result** is the output of any type of numerical model. It is the data generated by the model during a simulation. Model results can be compared with observations to assess the model's performance. In the context of validation, the term "model result" is often used interchangeably with "model output" or "model prediction". `ModelSkill` supports [point](api/model/point.md), [track](api/model/track.md), [dfsu](api/model/dfsu.md) and [grid](api/model/grid.md) model result types. - - -### Metric -A **metric** is a quantitative measure (a mathematical expression) used to evaluate the performance of a numerical model. Metrics provide a standardized way to assess the model's accuracy, precision, and other attributes. A metric aggregates the skill of a model into a single number. See list of [metrics](api/metrics.md#modelskill.metrics) supported by `ModelSkill`. - - -### Score -A **score** is a numerical value that summarizes the model's performance based on chosen metrics. Scores can be used to rank or compare different models or model configurations. In the context of validation, the "skill score" or "validation score" often quantifies the model's overall performance. The score of a model is a single number, calculated as a weighted average for all time-steps, observations and variables. If you want to perform automated calibration, you can use the score as the objective function. In `ModelSkill`, [`score`](api/comparercollection.md/#modelskill.ComparerCollection.score) is also a specific method on [Comparer](#comparer) objects that returns a single number aggregated score using a specific [metric](#metric). - - -## ModelSkill-specific terminology - -### matched data -In ModelSkill, observations and model results are *matched* when they refer to the same positions in space and time. If the [observations](#observation) and [model results](#model-result) are already matched, the [`from_matched`](api/matching.md/#modelskill.from_matched) function can be used to create a [Comparer](#comparer) directly. Otherwise, the [compare](#compare) function can be used to match the observations and model results in space and time. - - -### match -The function [`match`](api/matching.md/#modelskill.match) is used to match a model result with observations. It returns a [`Comparer`](api/comparer.md) object or a [`ComparerCollection`](api/comparercollection.md) object. - - -### Comparer -A [**Comparer**](api/comparer.md) is an object that compares a model result with observations. It is used to calculate validation metrics and generate plots. A Comparer can be created using the [`compare`](api/matching.md/#modelskill.match) function (will return a [ComparerCollection](api/comparercollection.md)). - - -### ComparerCollection -A [**ComparerCollection**](api/comparercollection.md) is a collection of Comparers. It is used to compare multiple model results with multiple observations. A ComparerCollection can be created using the [`compare`](api/matching.md/#modelskill.match) function. - - -### Connector -In past versions of FMSkill/ModelSkill, the Connector class was used to connect observations and model results. This class has been deprecated and is no longer in use. - diff --git a/docs/user-guide/data-structures.md b/docs/user-guide/data-structures.md new file mode 100644 index 000000000..7f9001f22 --- /dev/null +++ b/docs/user-guide/data-structures.md @@ -0,0 +1,53 @@ +# Data Structures + +The main data structures in ModelSkill can be grouped into three categories: + +* **Primary** data (observations and model results) +* **Comparer** objects +* **Skill** objects + +All objects share some common principles: + +* The data container is accesssible via the `data` attribute. +* The data container is an `xarray` object (except for the `SkillTable` object, which is a `pandas` object). +* The main data selection method is `sel`, which is a wrapper around `xarray.Dataset.sel`. +* All plotting are accessible via the `plot` accessor of the object. + + +## Observations and model results + +The primary data of ModelSkill are the data that needs to be compared: observations and model results. The underlying data structures are very similar and can be grouped according to the spatial dimensionality (`gtype`) of the data: + +* `point`: 0D time series data +* `track`: 0D time series data at moving locations (trajectories) +* `grid`: gridded 2D data +* `dfsu`: flexible mesh 2D data + +Point and track data are both `TimeSeries` objects, while grid and dfsu data are both `SpatialField` objects. `TimeSeries` objects are ready to be compared whereas data from `SpatialField` object needs to be *extracted* first (the extracted object will be of the `TimeSeries` type). + +`TimeSeries` objects contains its data in an `xarray.Dataset` with the actual data in the first DataArray and optional auxilliary data in the following DataArrays. The DataArrays have a `kind` attribute with either `observation` or `model`. + + +## Comparer objects + +Comparer objects are results of a matching procedure (between observations and model results) or constructed directly from already matched data. A comparison of a *single* observation and one or more model results are stored in a `Comparer` object. A comparison of *multiple* observations and one or more model results are stored in a `ComparerCollection` object which is a collection of `Comparer` objects. + +The matched data in a `Comparer` is stored in an `xarray.Dataset` which can be accessed via the `data` attribute. The Dataset has an attribute `gtype` which is a string describing the type of data (e.g. `point`, `track`). The first DataArray in the Dataset is the observation data, the next DataArrays are model result data and optionally additional DataArrays are auxilliarye data. Each of the DataArrays have a `kind` attribute with either `observation`, `model` or `aux`. + +Both `Comparer` and `ComparerCollection` have a `plot` accessor for plotting the data (e.g. `cmp.plot.timeseries()` or `cmp.plot.scatter()`). + + + +## Skill objects + +Calling a skill method on a comparer object will return a skill object with skill scores (statistics) from comparing observation and model result data using different metrics (e.g. root mean square error). Two skill objects are currently implemented: `SkillTable` and `SkillGrid`. The first is relevant for all ModelSkill users while the latter is relevant for users of the track data (e.g. MetOcean studies using satellite altimetry data). + +If `c` is a comparer object, then the following skill methods are available: + +* `c.skill()` -> `SkillTable` +* `c.mean_skill()` -> `SkillTable` +* `c.gridded_skill()` -> `SkillGrid` + + +### SkillTable + diff --git a/docs/getting-started.md b/docs/user-guide/getting-started.md similarity index 67% rename from docs/getting-started.md rename to docs/user-guide/getting-started.md index e9c885514..30d2afd11 100644 --- a/docs/getting-started.md +++ b/docs/user-guide/getting-started.md @@ -8,34 +8,22 @@ results and observations. ## Workflow -The typical ModelSkill workflow consists of these five steps: +The typical ModelSkill workflow consists of these four steps: -1. Define **ModelResults** -2. Define **Observations** +1. Define **Observations** +2. Define **ModelResults** 3. **Match** observations and ModelResults in space and time 4. Do analysis, plotting, etc with a **Comparer** -### 1. Define ModelResults -The result of a simulation is stored in one or more result files, e.g. dfsu, dfs0, nc, csv. - -The name is used to identify the model result in the plots and tables. +### Define Observations -```python hl_lines="4" -import modelskill as ms -mr = ms.DfsuModelResult("SW/HKZN_local_2017_DutchCoast.dfsu", - item="Sign. Wave Height", - name='HKZN_local') -``` - -### 2. Define Observations - -The next step is to define the measurements to be used for the skill +The first step is to define the measurements to be used for the skill assessment. Two types of observation are available: -- [PointObservation](api/observation/point.md) -- [TrackObservation](api/observation/track.md) +- [PointObservation](../api/observation/point.md) +- [TrackObservation](../api/observation/track.md) Let's assume that we have one PointObservation and one TrackObservation (`name` is used to identify the observation, similar to the `name` of the model above). @@ -57,33 +45,45 @@ file, the item number (or item name) and a name. A PointObservation further needs to be initialized with it\'s x-, y-position. -### 3. Match observations and ModelResults +### Define ModelResults + +The result of a simulation is stored in one or more result files, e.g. dfsu, dfs0, nc, csv. + +The name is used to identify the model result in the plots and tables. + +```python hl_lines="4" +import modelskill as ms +mr = ms.DfsuModelResult("SW/HKZN_local_2017_DutchCoast.dfsu", + item="Sign. Wave Height", + name='HKZN_local') +``` + + + +### Match observations and ModelResults + +This [match()](../api/matching.md/#modelskill.match) method returns a [Comparer](../api/comparer.md#modelskill.Comparer) (a single observation) or a +[ComparerCollection](../api/comparercollection.md#modelskill.ComparerCollection) (multiple observations) +for further analysis and plotting. ```python cc = ms.match([hkna, c2], mr) ``` -This method returns a -[ComparerCollection](api/comparercollection.md#modelskill.ComparerCollection) -for further analysis and plotting. -### 4. Do analysis, plotting, etc with a Comparer +### Do analysis, plotting, etc with a Comparer The object returned by the `match()` method is a *Comparer*/*ComparerCollection*. It holds the matched observation and model data and has methods for plotting and skill assessment. The primary comparer methods are: -- [skill()](api/comparercollection.md#modelskill.ComparerCollection.skill) - which returns a table with the skill scores -- various plot methods of the comparer objects - * `plot.scatter()` - * `plot.timeseries()` - * `plot.kde()` - * `plot.qq()` - * `plot.hist()` - +- [skill()](../api/comparercollection.md#modelskill.ComparerCollection.skill) + which returns a [SkillTable](../api/skill.md) with the skill scores +- various [plot](../api/comparercollection.md/#modelskill.comparison._collection_plotter.ComparerCollectionPlotter) methods of the comparer objects (e.g. `plot.scatter()`, `plot.timeseries()`) +- [sel()](../api/comparercollection.md/#modelskill.ComparerCollection.sel) method for selecting data + ### Save / load the ComparerCollection @@ -106,9 +106,9 @@ In order to select only a subset of the data for analysis, the comparer has a `s This method allow filtering of the data in several ways: -- on `observation` by specifying name or id of one or more +- on `observation` by specifying name or index of one or more observations -- on `model` (if more than one is compared) by giving name or id +- on `model` (if more than one is compared) by giving name or index - temporal using the `time` (or `start` and `end`) arguments - spatial using the `area` argument given as a bounding box or a polygon diff --git a/docs/user-guide/index.md b/docs/user-guide/index.md new file mode 100644 index 000000000..6f7168df9 --- /dev/null +++ b/docs/user-guide/index.md @@ -0,0 +1,10 @@ +# User Guide + +ModelSkill compares model results with observations. The workflow can be split in two phases: + +1. [Matching](matching.md) - making sure that observations and model results are in the same space and time +2. Analysis - [plots](plotting.md) and [statistics](skill.md) of the matched data + +If the observations and model results are already matched (i.e. are stored in the same data source), +the `from_matched()` function can be used to go directly to the analysis phase. +If not, the `match()` function can be used to match the observations and model results in space and time. diff --git a/docs/user-guide/matching.md b/docs/user-guide/matching.md new file mode 100644 index 000000000..951bcc4df --- /dev/null +++ b/docs/user-guide/matching.md @@ -0,0 +1,108 @@ +# Matching + +Once observations and model results have been defined, the next step is to match them. This is done using the `match()` function which handles the allignment of the observation and model result data in space and time. Note that if the data is already matched, the `from_matched()` function can be used to create a `Comparer` directly from the matched data and the matching described here is not needed. + +The observation is considered the *truth* and the model result data is therefore interpolated to the observation data positions. + +The matching process will be different depending on the geometry of observation and model result: + +* Geometries are the *same* (e.g. both are point time series): only temporal matching is needed +* Geometries are *different* (e.g. observation is a point time series and model result is a grid): data is first spatially *extracted* from the model result and *then* matched in time. + + +## Temporal matching + +Temporal matching is done by interpolating the model result data to the observation data time points; it is carried out after spatial matching when applicable. The interpolation is *linear* in time and done inside the `match()` function. + + +## Matching of time series + +If observation and model result are of the same geometry, the matching is done *one* observation at a time. Several model results can be matched to the same observation. The result of the matching process is a `Comparer` object which contains the matched data. + +In the most simple cases, one observation to one model result, the `match()` function can be used directly, without creating Observation and ModelResult objects first: + +```python +>>> cmp = ms.match('obs.dfs0', 'model.dfs0', obs_item='obs_WL', mod_item='WL') +``` + +In all other cases, the observations and model results needs to be defined first. + +```python +>>> o = ms.observation('obs.dfs0', item='waterlevel') +>>> mr1 = ms.model_result('model1.dfs0', item='WL1') +>>> mr2 = ms.model_result('model2.dfs0', item='WL2') +>>> cmp = ms.match(o, [mr1, mr2]) +``` + +In most cases, *several* observations needs to matched with several model results. This can be done by constructing a list of `Comparer` objects and then combining them into a `ComparerCollection`: + +```python +>>> cmps = [] +>>> for o in observations: +>>> mr1 = ... +>>> mr2 = ... +>>> cmps.append(ms.match(o, [mr1, mr2])) +>>> cc = ms.ComparerCollection(cmps) +``` + + + +## Matching with dfsu or grid model result + +If the model result is a SpatialField, i.e., either a `GridModelResult` or a `DfsuModelResult`, and the observation is of lower dimension (e.g. point), then the model result needs to be *extracted* before matching can be done. This can be done "offline" before using ModelSkill, e.g., using [MIKE](https://www.mikepoweredbydhi.com/) tools or [MIKE IO](https://github.com/DHI/mikeio), or as part of the matching process using ModelSkill. We will here focus on the latter. + +In this situation, *multiple* observations can be matched to the same model result, in which case the `match` function returns a `ComparerCollection` instead of a `Comparer` which is the returned object for single observation matching. + +```python +>>> o1 = ms.observation('obs1.dfs0', item='waterlevel') +>>> o2 = ms.observation('obs2.dfs0', item='waterlevel') +>>> mr = ms.model_result('model.dfsu', item='WaterLevel') +>>> cc = ms.match([o1, o2], mr) # returns a ComparerCollection +``` + +Matching `PointObservation` with `SpatialField` model results consists of two steps: + +1. Extracting data from the model result at the spatial position of the observation, which returns a PointModelResult +2. Matching the extracted data with the observation data in time + +Matching `TrackObservation` with `SpatialField` model results is for technical reasons handled in *one* step, i.e., the data is extracted in both space and time. + +The spatial matching method (selection or interpolation) can be specified using the `spatial_method` argument of the `match()` function. The default method depends on the type of observation and model result as specified in the sections below. + + +### Extracting data from a DfsuModelResult + +Extracting data for a specific point position from the flexible mesh dfsu files can be done in several ways (specified by the `spatial_method` argument of the `match()` function): + +* Selection of the "contained" element +* Selection of the "nearest" element (often the same as the contained element, but not always) +* Interpolation with "inverse_distance" weighting (IDW) using the five nearest elements (default) + +The default (inverse_distance) is not necessarily the best method in all cases. When the extracted position is close to the model boundary, "contained" may be a better choice. + +```python +>>> cc = ms.match([o1, o2], mr_dfsu, spatial_method='contained') +``` + +Note that extraction of *track* data does not currently support the "contained" method. + +Note that the extraction of point data from 3D dfsu files is not yet fully supported. It is recommended to extract the data "offline" prior to using ModelSkill. + + +### Extracting data from a GridModelResult + +Extracting data from a GridModelResult is done through xarray's `interp()` function. The `spatial_method` argument of the `match()` function is passed on to the `interp()` function as the `method` argument. The default method is "linear" which is the recommended method for most cases. Close to land where the grid model result data is often missing, "nearest" may be a better choice. + +```python +>>> cc = ms.match([o1, o2], mr_netcdf, spatial_method='nearest') +``` + + +## Event-based matching and handling of gaps + +If the model result data contains gaps either because only events are stored or because of missing data, the `max_model_gap` argument of the `match()` function can be used to specify the maximum allowed gap (in seconds) in the model result data. This will avoid interpolating model data over long gaps in the model result data! + + +## Multiple model results with different temporal coverage + +If the model results have different temporal coverage, the `match()` function will only match the overlapping time period to ensure that the model results are comparable. The `Comparer` object will contain the matched data for the overlapping period only. diff --git a/docs/overview.md b/docs/user-guide/overview.md similarity index 100% rename from docs/overview.md rename to docs/user-guide/overview.md diff --git a/docs/user-guide/plotting.md b/docs/user-guide/plotting.md new file mode 100644 index 000000000..c3aed7314 --- /dev/null +++ b/docs/user-guide/plotting.md @@ -0,0 +1,78 @@ +# Plotting + +## Plotting observations and model results + +PointObservations and PointModelResults can be plotted using their `plot` accessor: + +```python +>>> o.plot.timeseries() +>>> mr.plot.timeseries() +>>> mr.plot.hist() +``` + +Only the observation time series is shown here: + +![Timeseries plot](../images/obs_timeseries.png) + + +## Plotting temporal coverage + +The temporal coverage of observations and model results can be plotted using the [`temporal_coverage`](../api/plotting.md/#modelskill.plotting.temporal_coverage) function in the [`plotting`](../api/plotting.md) module: + +```python +>>> o1 = ms.PointObservation('HKNA.dfs0', item=0, x=4.2420, y=52.6887) +>>> o2 = ms.PointObservation('EPL.dfs0', item=0, x=3.2760, y=51.9990) +>>> o3 = ms.TrackObservation("Alti_c2.dfs0", item=3) +>>> mr = ms.DfsuModelResult('HKZN_local.dfsu', item=0) +>>> ms.plotting.temporal_coverage(obs=[o1, o2, o3], mod=mr) +``` + +![Temporal coverage](../images/temporal_coverage.png) + + +## Plotting spatial overview + +The spatial coverage of observations and model results can be plotted using the [`spatial_overview`](../api/plotting.md/#modelskill.plotting.spatial_overview) function in the [`plotting`](../api/plotting.md) module: + +```python +>>> ms.plotting.spatial_overview([o1, o2, o3], mr) +``` + +![Spatial overview](../images/spatial_overview.png) + + +## Plotting compared data + +The `plot` accessor on a Comparer or ComparerCollection object can be used to plot the compared data: + +```python +>>> cmp.plot.timeseries() +>>> cc.plot.timeseries() +>>> cc.plot.scatter() +``` + + +## Plotting Taylor diagrams + +A Taylor diagram shows how well a model result matches an observation in terms of correlation, standard deviation and root mean square error. The `taylor` plot can be accessed through the Comparer [`plot`](../api/comparer.md/#modelskill.comparison._comparer_plotter.ComparerPlotter) accessor or the ComparerCollection [`plot`](../api/comparercollection.md/#modelskill.comparison._collection_plotter.ComparerCollectionPlotter) accessor: + +```python +>>> cc = ms.match([o1, o2, o3], [mr_CMEMS, mr_ERA5, mr_MIKE21SW]) +>>> cc.plot.taylor() +``` + +![Taylor diagram](../images/plot_taylor.png) + +The radial distance from the point to the observation point is the standard deviation ratio, the angle is the correlation coefficient and the distance from the observation point to the model point is the root mean square error ratio. The closer the model point is to the observation point, the better the model result matches the observation. The closer the model point is to the origin, the better the model result matches the observation in terms of standard deviation and root mean square error. The closer the model point is to the horizontal axis, the better the model result matches the observation in terms of correlation. + + +## Plotting directional data (e.g. wind or currents) + +Directional data can be plotted using the [`wind_rose`](../api/plotting.md/#modelskill.plotting.wind_rose) function in the [`plotting`](../api/plotting.md) module. The function takes an array-like structure with speed and direction as columns (from one or two sources) and plots a wind rose: + +```python +>>> df = pd.read_csv('wind.csv', index_col=0, parse_dates=True) +>>> ms.plotting.wind_rose(df) +``` + +![Wind rose](../images/wind_rose.png) \ No newline at end of file diff --git a/docs/user-guide/selecting-data.md b/docs/user-guide/selecting-data.md new file mode 100644 index 000000000..bdd26ade3 --- /dev/null +++ b/docs/user-guide/selecting-data.md @@ -0,0 +1,37 @@ +# Selecting/filtering data + +The primary data filtering method of ModelSkill is the `sel()` method which is accesible on most ModelSkill data structures. The `sel()` method is a wrapper around `xarray.Dataset.sel()` and can be used to select data based on time, location and/or variable. The `sel()` method returns a new data structure of the same type with the selected data. + + +## TimeSeries data + +Point and track timeseries data of both observation and model result kinds are stored in `TimeSeries` objects which uses `xarray.Dataset` as data container. The `sel()` method can be used to select data based on time and returns a new `TimeSeries` object with the selected data. + +```python +>>> o = ms.observation('obs.nc', item='waterlevel') +>>> o_1month = o.sel(time=slice('2018-01-01', '2018-02-01')) +``` + + +## Comparer objects + +`Comparer` and `ComparerCollection` contain matched data from observations and model results. The `sel()` method can be used to select data based on time, model, quantity or other criteria and returns a new comparer object with the selected data. + +```python +>>> cmp = ms.match(o, [m1, m2]) +>>> cmp_1month = cmp.sel(time=slice('2018-01-01', '2018-02-01')) +>>> cmp_m1 = cmp.sel(model='m1') +``` + + + +## Skill objects + +The `skill()` and `mean_skill()` methods return a `SkillTable` object with skill scores from comparing observation and model result data using different metrics (e.g. root mean square error). The data of the `SkillTable` object is stored in a (MultiIndex) `pandas.DataFrame` which can be accessed via the `data` attribute. The `sel()` method can be used to select specific rows and returns a new `SkillTable` object with the selected data. + +```python +>>> sk = cmp.skill() +>>> sk_m1 = sk.sel(model='m1') +``` + + diff --git a/docs/user-guide/skill.md b/docs/user-guide/skill.md new file mode 100644 index 000000000..223ed1c37 --- /dev/null +++ b/docs/user-guide/skill.md @@ -0,0 +1,5 @@ +# Skill + +Matched data can be analysed statistically using the `skill()` function. The function returns a `Skill` object which contains the statistical results. The `Skill` object can be printed to the console or saved to a file using the `save()` function. + +```python \ No newline at end of file diff --git a/docs/user-guide/terminology.md b/docs/user-guide/terminology.md new file mode 100644 index 000000000..81db31c1a --- /dev/null +++ b/docs/user-guide/terminology.md @@ -0,0 +1,78 @@ +# Terminology + +ModelSkill is a library for assessing the skill of numerical models. It provides tools for comparing model results with observations, plotting the results and calculating validation metrics. This page defines some of the key terms used in the documentation. + + +## Skill +**Skill** refers to the ability of a numerical model to accurately represent the real-world phenomenon it aims to simulate. It is a measure of how well the model performs in reproducing the observed system. Skill can be assessed using various metrics, such as accuracy, precision, and reliability, depending on the specific goals of the model and the nature of the data. In ModelSkill, [`skill`](../api/comparer.md/#modelskill.Comparer.skill) is also a specific method on [Comparer](#comparer) objects that returns a [`SkillTable`](../api/skill.md) with aggregated skill scores per observation and model for a list of selected [metrics](#metric). + + +## Validation +**Validation** is the process of assessing the model's performance by comparing its output to real-world observations or data collected from the system being modeled. It helps ensure that the model accurately represents the system it simulates. Validation is typically performed before the model is used for prediction or decision-making. + + +## Calibration +**Calibration** is the process of adjusting the model's parameters or settings to improve its performance. It involves fine-tuning the model to better match observed data. Calibration aims to reduce discrepancies between model predictions and actual measurements. At the end of the calibration process, the calibrated model should be validated with independent data. + + +## Performance +**Performance** is a measure of how well a numerical model operates in reproducing the observed system. It can be assessed using various metrics, such as accuracy, precision, and reliability, depending on the specific goals of the model and the nature of the data. In this context, **performance** is synonymous with **skill**. + + +## Timeseries +A **timeseries** is a sequence of data points in time. In ModelSkill, The data can either be from [observations](#observation) or [model results](#model-result). Timeseries can univariate or multivariate; ModelSkill primarily supports univariate timeseries. Multivariate timeseries can be assessed one variable at a time. Timeseries can also have different spatial dimensions, such as point, track, line, or area. + + +## Observation +An **observation** refers to real-world data or measurements collected from the system you are modeling. Observations serve as a reference for assessing the model's performance. These data points are used to compare with the model's predictions during validation and calibration. Observations are usually based on field measurements or laboratory experiments, but for the purposes of model validation, they can also be derived from other models (e.g. a reference model). ModelSkill supports [point](../api/observation/point.md) and [track](../api/observation/track.md) observation types. + + +## Measurement +A **measurement** is called [observation](#observation) in ModelSkill. + + +## Model result +A **model result** is the output of any type of numerical model. It is the data generated by the model during a simulation. Model results can be compared with observations to assess the model's performance. In the context of validation, the term "model result" is often used interchangeably with "model output" or "model prediction". ModelSkill supports [point](../api/model/point.md), [track](../api/model/track.md), [dfsu](../api/model/dfsu.md) and [grid](../api/model/grid.md) model result types. + + +## Metric +A **metric** is a quantitative measure (a mathematical expression) used to evaluate the performance of a numerical model. Metrics provide a standardized way to assess the model's accuracy, precision, and other attributes. A metric aggregates the skill of a model into a single number. See list of [metrics](../api/metrics.md#modelskill.metrics) supported by ModelSkill. + + +## Score +A **score** is a numerical value that summarizes the model's performance based on chosen metrics. Scores can be used to rank or compare different models or model configurations. In the context of validation, the "skill score" or "validation score" often quantifies the model's overall performance. The score of a model is a single number, calculated as a weighted average for all time-steps, observations and variables. If you want to perform automated calibration, you can use the score as the objective function. In ModelSkill, [`score`](../api/comparercollection.md/#modelskill.ComparerCollection.score) is also a specific method on [Comparer](#comparer) objects that returns a single number aggregated score using a specific [metric](#metric). + + +## Matched data +In ModelSkill, observations and model results are *matched* when they refer to the same positions in space and time. If the [observations](#observation) and [model results](#model-result) are already matched, the [`from_matched`](../api/matching.md/#modelskill.from_matched) function can be used to create a [Comparer](#comparer) directly. Otherwise, the [match](#match) function can be used to match the observations and model results in space and time. + + +## match() +The function [`match`](../api/matching.md/#modelskill.match) is used to match a model result with observations. It returns a [`Comparer`](../api/comparer.md) object or a [`ComparerCollection`](../api/comparercollection.md) object. + + +## Comparer +A [**Comparer**](../api/comparer.md) is an object that stores the matched observation and model result data for a *single* observation. It is used to calculate validation metrics and generate plots. A Comparer can be created using the [`match`](../api/matching.md/#modelskill.match) function. + + +## ComparerCollection +A [**ComparerCollection**](../api/comparercollection.md) is a collection of [Comparer](#comparer)s. It is used to compare *multiple* observations with one or more model results. A ComparerCollection can be created using the [`match`](../api/matching.md/#modelskill.match) function or by passing a list of Comparers to the [`ComparerCollection`](../api/comparercollection.md/#modelskill.ComparerCollection) constructor. + + +## Connector +In past versions of FMSkill/ModelSkill, the Connector class was used to connect observations and model results. This class has been deprecated and is no longer in use. + + +## Abbreviations + +| Abbreviation | Meaning | +| --- | --- | +| `ms` | ModelSkill | +| `o` or `obs` | Observation | +| `mr` or `mod` | Model result | +| `cmp` | `Comparer` | +| `cc` | `ComparerCollection` | +| `sk` | `SkillTable` | +| `mtr` | Metric | +| `q` | `Quantity` | + diff --git a/docs/vision.md b/docs/user-guide/vision.md similarity index 100% rename from docs/vision.md rename to docs/user-guide/vision.md diff --git a/mkdocs.yml b/mkdocs.yml index 4922bf8d5..5baed1644 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -1,9 +1,10 @@ -site_name: modelskill +site_name: ModelSkill documentation site_url: https://dhi.github.io/modelskill/ theme: name: material + logo: images/dhi-white-logo.svg features: - # - navigation.tabs + - navigation.tabs # - navigation.instant - navigation.expand - navigation.sections @@ -11,23 +12,30 @@ theme: # toc-depth: 1 nav: - - 'index.md' - - 'getting-started.md' - - 'overview.md' - - 'vision.md' - - 'terminology.md' - - API: + - User Guide: + - 'user-guide/index.md' + - 'user-guide/getting-started.md' + - 'user-guide/overview.md' + - 'user-guide/terminology.md' + - 'user-guide/data-structures.md' + - 'user-guide/matching.md' + - 'user-guide/plotting.md' + - 'user-guide/selecting-data.md' + - API Reference: - 'api/index.md' - Observation: - 'api/observation/index.md' + - observation(): 'api/observation/observation.md' - PointObservation: 'api/observation/point.md' - TrackObservation: 'api/observation/track.md' - Model Result: - 'api/model/index.md' + - model_result(): 'api/model/model_result.md' - PointModelResult: 'api/model/point.md' - TrackModelResult: 'api/model/track.md' - DfsuModelResult: 'api/model/dfsu.md' - - GridModelResult: 'api/model/grid.md' + - GridModelResult: 'api/model/grid.md' + - DummyModelResult: 'api/model/dummy.md' - 'api/matching.md' - 'api/comparer.md' - 'api/comparercollection.md' @@ -48,6 +56,11 @@ markdown_extensions: - pymdownx.inlinehilite - pymdownx.snippets - pymdownx.superfences + - attr_list + - md_in_html + - pymdownx.emoji: + emoji_index: !!python/name:material.extensions.emoji.twemoji + emoji_generator: !!python/name:material.extensions.emoji.to_svg extra_javascript: - javascripts/mathjax.js diff --git a/modelskill/__init__.py b/modelskill/__init__.py index 644121d0d..8e6efdb6b 100644 --- a/modelskill/__init__.py +++ b/modelskill/__init__.py @@ -19,7 +19,7 @@ # Dev branch marker is: 'X.Y.dev' or 'X.Y.devN' where N is an integer. # 'X.Y.dev0' is the canonical version of 'X.Y.dev' # -__version__ = "1.0.dev24" +__version__ = "1.1.dev0" if "64" not in architecture()[0]: raise Exception("This library has not been tested for a 32 bit system.") diff --git a/modelskill/comparison/_collection.py b/modelskill/comparison/_collection.py index 57586c917..c6d9057ea 100644 --- a/modelskill/comparison/_collection.py +++ b/modelskill/comparison/_collection.py @@ -1,4 +1,5 @@ from __future__ import annotations +from copy import deepcopy import os from pathlib import Path import tempfile @@ -92,7 +93,15 @@ class ComparerCollection(Mapping, Scoreable): def __init__(self, comparers: Iterable[Comparer]) -> None: self._comparers: Dict[str, Comparer] = {} - self._insert_comparers(comparers) + + for cmp in comparers: + if cmp.name in self._comparers: + # comparer with this name already exists! + # maybe the user is trying to add a new model + # or a new time period + self._comparers[cmp.name] += cmp + else: + self._comparers[cmp.name] = cmp self.plot = ComparerCollection.plotter(self) """Plot using the ComparerCollectionPlotter @@ -105,15 +114,6 @@ def __init__(self, comparers: Iterable[Comparer]) -> None: >>> cc.plot.hist() """ - def _insert_comparers(self, comparer: Union[Comparer, Iterable[Comparer]]) -> None: - if isinstance(comparer, Iterable): - for c in comparer: - self[c.name] = c - elif isinstance(comparer, Comparer): - self[comparer.name] = comparer - else: - pass - @property def _name(self) -> str: return "Observations" @@ -215,9 +215,10 @@ def n_quantities(self) -> int: def __repr__(self) -> str: out = [] - out.append(f"<{type(self).__name__}>") - for key, value in self._comparers.items(): - out.append(f"{type(value).__name__}: {key}") + out.append("") + out.append("Comparers:") + for index, (key, value) in enumerate(self._comparers.items()): + out.append(f"{index}: {key} - {value.quantity}") return str.join("\n", out) def rename(self, mapping: Dict[str, str]) -> "ComparerCollection": @@ -252,12 +253,10 @@ def rename(self, mapping: Dict[str, str]) -> "ComparerCollection": return ComparerCollection(cmps) @overload - def __getitem__(self, x: slice | Iterable[Hashable]) -> ComparerCollection: - ... + def __getitem__(self, x: slice | Iterable[Hashable]) -> ComparerCollection: ... @overload - def __getitem__(self, x: int | Hashable) -> Comparer: - ... + def __getitem__(self, x: int | Hashable) -> Comparer: ... def __getitem__( self, x: int | Hashable | slice | Iterable[Hashable] @@ -279,33 +278,14 @@ def __getitem__( raise TypeError(f"Invalid type for __getitem__: {type(x)}") - def __setitem__(self, x: str, value: Comparer) -> None: - assert isinstance( - value, Comparer - ), f"comparer must be a Comparer, not {type(value)}" - if x in self._comparers: - # comparer with this name already exists! - # maybe the user is trying to add a new model - # or a new time period - self._comparers[x] = self._comparers[x] + value # type: ignore - else: - self._comparers[x] = value - def __len__(self) -> int: return len(self._comparers) def __iter__(self) -> Iterator[Comparer]: return iter(self._comparers.values()) - def __copy__(self) -> "ComparerCollection": - cls = self.__class__ - cp = cls.__new__(cls) - # TODO should this use deepcopy? - cp.__init__(list(self._comparers)) # type: ignore - return cp - def copy(self) -> "ComparerCollection": - return self.__copy__() + return deepcopy(self) def __add__( self, other: Union["Comparer", "ComparerCollection"] diff --git a/modelskill/comparison/_collection_plotter.py b/modelskill/comparison/_collection_plotter.py index 50c59fb42..53cde0a6e 100644 --- a/modelskill/comparison/_collection_plotter.py +++ b/modelskill/comparison/_collection_plotter.py @@ -20,6 +20,16 @@ def _default_univarate_title(kind: str, cc: ComparerCollection) -> str: class ComparerCollectionPlotter: + """Plotter for ComparerCollection + + Examples + -------- + >>> cc.plot.scatter() + >>> cc.plot.hist() + >>> cc.plot.kde() + >>> cc.plot.taylor() + """ + def __init__(self, cc: ComparerCollection) -> None: self.cc = cc self.is_directional = False diff --git a/modelskill/comparison/_comparer_plotter.py b/modelskill/comparison/_comparer_plotter.py index c28ac977f..398a74610 100644 --- a/modelskill/comparison/_comparer_plotter.py +++ b/modelskill/comparison/_comparer_plotter.py @@ -32,7 +32,17 @@ class ComparerPlotter: - """Plotter class for Comparer""" + """Plotter class for Comparer + + Examples + -------- + >>> cmp.plot.scatter() + >>> cmp.plot.timeseries() + >>> cmp.plot.hist() + >>> cmp.plot.kde() + >>> cmp.plot.qq() + >>> cmp.plot.box() + """ def __init__(self, comparer: Comparer) -> None: self.comparer = comparer diff --git a/modelskill/comparison/_comparison.py b/modelskill/comparison/_comparison.py index 471f04b2d..7510f2ac6 100644 --- a/modelskill/comparison/_comparison.py +++ b/modelskill/comparison/_comparison.py @@ -236,6 +236,8 @@ class ItemSelection: obs: str model: Sequence[str] aux: Sequence[str] + x: Optional[str] = None + y: Optional[str] = None def __post_init__(self) -> None: # check that obs, model and aux are unique, and that they are not overlapping @@ -245,14 +247,21 @@ def __post_init__(self) -> None: @property def all(self) -> Sequence[str]: - return [self.obs] + list(self.model) + list(self.aux) + res = [self.obs] + list(self.model) + list(self.aux) + if self.x is not None: + res.append(self.x) + if self.y is not None: + res.append(self.y) + return res @staticmethod def parse( - items: List[str], + items: Sequence[str], obs_item: str | int | None = None, mod_items: Optional[Iterable[str | int]] = None, aux_items: Optional[Iterable[str | int]] = None, + x_item: str | int | None = None, + y_item: str | int | None = None, ) -> ItemSelection: """Parse items and return observation, model and auxiliary items Default behaviour: @@ -263,27 +272,25 @@ def parse( Both integer and str are accepted as items. If str, it must be a key in data. """ assert len(items) > 1, "data must contain at least two items" - if obs_item is None: - obs_name: str = items[0] - else: - obs_name = _get_name(obs_item, items) + obs_name = _get_name(obs_item, items) if obs_item else items[0] # Check existance of items and convert to names - if mod_items is not None: - if isinstance(mod_items, (str, int)): - mod_items = [mod_items] - mod_names = [_get_name(m, items) for m in mod_items] + if aux_items is not None: if isinstance(aux_items, (str, int)): aux_items = [aux_items] aux_names = [_get_name(a, items) for a in aux_items] else: aux_names = [] + if mod_items is not None: + if isinstance(mod_items, (str, int)): + mod_items = [mod_items] + mod_names = [_get_name(m, items) for m in mod_items] + else: + mod_names = list(set(items) - set(aux_names) - set([obs_name])) - items.remove(obs_name) - - if mod_items is None: - mod_names = list(set(items) - set(aux_names)) + x_name = _get_name(x_item, items) if x_item is not None else None + y_name = _get_name(y_item, items) if y_item is not None else None assert len(mod_names) > 0, "no model items were found! Must be at least one" assert obs_name not in mod_names, "observation item must not be a model item" @@ -292,7 +299,9 @@ def parse( ), "observation item must not be an auxiliary item" assert isinstance(obs_name, str), "observation item must be a string" - return ItemSelection(obs=obs_name, model=mod_names, aux=aux_names) + return ItemSelection( + obs=obs_name, model=mod_names, aux=aux_names, x=x_name, y=y_name + ) def _area_is_bbox(area: Any) -> bool: @@ -349,12 +358,14 @@ def _matched_data_to_xarray( x: Optional[float] = None, y: Optional[float] = None, z: Optional[float] = None, + x_item: str | int | None = None, + y_item: str | int | None = None, quantity: Optional[Quantity] = None, ) -> xr.Dataset: """Convert matched data to accepted xarray.Dataset format""" assert isinstance(df, pd.DataFrame) cols = list(df.columns) - items = ItemSelection.parse(cols, obs_item, mod_items, aux_items) + items = ItemSelection.parse(cols, obs_item, mod_items, aux_items, x_item, y_item) # check that items.obs and items.model are numeric if not np.issubdtype(df[items.obs].dtype, np.number): @@ -380,17 +391,30 @@ def _matched_data_to_xarray( for a in items.aux: ds[a].attrs["kind"] = "auxiliary" - if x is not None: + if x_item is not None: + ds = ds.rename({items.x: "x"}).set_coords("x") + elif x is not None: ds.coords["x"] = x - if y is not None: + else: + ds.coords["x"] = np.nan + + if y_item is not None: + ds = ds.rename({items.y: "y"}).set_coords("y") + elif y is not None: ds.coords["y"] = y + else: + ds.coords["y"] = np.nan + + # No z-item so far (relevant for ProfileObservation) if z is not None: ds.coords["z"] = z - if x is None or np.isscalar(x): + if np.isscalar(ds.coords["x"]): ds.attrs["gtype"] = str(GeometryType.POINT) else: ds.attrs["gtype"] = str(GeometryType.TRACK) + # TODO + # ds.attrs["gtype"] = str(GeometryType.PROFILE) if quantity is None: q = Quantity.undefined() @@ -493,6 +517,8 @@ def from_matched_data( x: Optional[float] = None, y: Optional[float] = None, z: Optional[float] = None, + x_item: str | int | None = None, + y_item: str | int | None = None, quantity: Optional[Quantity] = None, ) -> "Comparer": """Initialize from compared data""" @@ -507,6 +533,8 @@ def from_matched_data( x=x, y=y, z=z, + x_item=x_item, + y_item=y_item, quantity=quantity, ) data.attrs["weight"] = weight @@ -514,12 +542,13 @@ def from_matched_data( def __repr__(self): out = [ - f"<{type(self).__name__}>", + "", f"Quantity: {self.quantity}", f"Observation: {self.name}, n_points={self.n_points}", + "Model(s):", ] - for model in self.mod_names: - out.append(f" Model: {model}, rmse={self.score()[model]:.3f}") + for index, model in enumerate(self.mod_names): + out.append(f"{index}: {model}") for var in self.aux_names: out.append(f" Auxiliary: {var}") @@ -751,6 +780,24 @@ def _to_observation(self) -> PointObservation | TrackObservation: else: raise NotImplementedError(f"Unknown gtype: {self.gtype}") + def __iadd__(self, other: Comparer): # type: ignore + from ..matching import match_space_time + + missing_models = set(self.mod_names) - set(other.mod_names) + if len(missing_models) == 0: + # same obs name and same model names + self.data = xr.concat([self.data, other.data], dim="time").drop_duplicates( + "time" + ) + else: + self.raw_mod_data.update(other.raw_mod_data) + matched = match_space_time( + observation=self._to_observation(), raw_mod_data=self.raw_mod_data # type: ignore + ) + self.data = matched + + return self + def __add__( self, other: Union["Comparer", "ComparerCollection"] ) -> "ComparerCollection" | "Comparer": @@ -766,12 +813,9 @@ def __add__( if len(missing_models) == 0: # same obs name and same model names cmp = self.copy() - cmp.data = xr.concat([cmp.data, other.data], dim="time") - # cc.data = cc.data[ - # ~cc.data.time.to_index().duplicated(keep="last") - # ] # 'first' - _, index = np.unique(cmp.data["time"], return_index=True) - cmp.data = cmp.data.isel(time=index) + cmp.data = xr.concat( + [cmp.data, other.data], dim="time" + ).drop_duplicates("time") else: raw_mod_data = self.raw_mod_data.copy() diff --git a/modelskill/matching.py b/modelskill/matching.py index d5106be0a..affb03fde 100644 --- a/modelskill/matching.py +++ b/modelskill/matching.py @@ -82,6 +82,8 @@ def from_matched( x: Optional[float] = None, y: Optional[float] = None, z: Optional[float] = None, + x_item: str | int | None = None, + y_item: str | int | None = None, ) -> Comparer: """Create a Comparer from observation and model results that are already matched (aligned) Parameters @@ -105,6 +107,10 @@ def from_matched( y-coordinate of observation, by default None z : float, optional z-coordinate of observation, by default None + x_item: [str, int], optional, + Name of x item, only relevant for track data + y_item: [str, int], optional + Name of y item, only relevant for track data Examples -------- @@ -148,6 +154,8 @@ def from_matched( x=x, y=y, z=z, + x_item=x_item, + y_item=y_item, quantity=quantity, ) diff --git a/modelskill/model/dfsu.py b/modelskill/model/dfsu.py index 5d046736b..37919488d 100644 --- a/modelskill/model/dfsu.py +++ b/modelskill/model/dfsu.py @@ -92,8 +92,13 @@ def __init__( self.filename = filename # TODO: remove? backward compatibility def __repr__(self) -> str: - # TODO add item name - return f"<{self.__class__.__name__}> '{self.name}'" + res = [] + res.append(f"<{self.__class__.__name__}>: {self.name}") + res.append(f"Time: {self.time[0]} - {self.time[-1]}") + res.append(f"Quantity: {self.quantity}") + if len(self.sel_items.aux) > 0: + res.append(f"Auxiliary variables: {', '.join(self.sel_items.aux)}") + return "\n".join(res) @property def time(self) -> pd.DatetimeIndex: diff --git a/modelskill/model/factory.py b/modelskill/model/factory.py index 6cea9afd2..e4bbdbdd1 100644 --- a/modelskill/model/factory.py +++ b/modelskill/model/factory.py @@ -25,6 +25,7 @@ def model_result( data: DataInputType, *, + aux_items: Optional[list[int | str]] = None, gtype: Optional[Literal["point", "track", "unstructured", "grid"]] = None, **kwargs: Any, ) -> Any: @@ -34,6 +35,8 @@ def model_result( ---------- data : DataInputType The data to be used for creating the ModelResult object. + aux_items : Optional[list[int | str]] + Auxiliary items, by default None gtype : Optional[Literal["point", "track", "unstructured", "grid"]] The geometry type of the data. If not specified, it will be guessed from the data. **kwargs @@ -54,6 +57,7 @@ def model_result( return _modelresult_lookup[geometry]( data=data, + aux_items=aux_items, **kwargs, ) diff --git a/modelskill/model/grid.py b/modelskill/model/grid.py index 2d5d2b755..a7821de56 100644 --- a/modelskill/model/grid.py +++ b/modelskill/model/grid.py @@ -92,8 +92,13 @@ def __init__( self.quantity = quantity def __repr__(self) -> str: - # TODO add item name - return f" '{self.name}'" + res = [] + res.append(f"<{self.__class__.__name__}>: {self.name}") + res.append(f"Time: {self.time[0]} - {self.time[-1]}") + res.append(f"Quantity: {self.quantity}") + if len(self.sel_items.aux) > 0: + res.append(f"Auxiliary variables: {', '.join(self.sel_items.aux)}") + return "\n".join(res) @property def time(self) -> pd.DatetimeIndex: diff --git a/modelskill/model/point.py b/modelskill/model/point.py index c681e07e0..0e14e5ee7 100644 --- a/modelskill/model/point.py +++ b/modelskill/model/point.py @@ -84,13 +84,16 @@ def interp_time(self, observation: Observation, **kwargs: Any) -> PointModelResu """ Interpolate model result to the time of the observation + wrapper around xarray.Dataset.interp() + Parameters ---------- observation : Observation The observation to interpolate to **kwargs - Additional keyword arguments passed to xarray.interp + Additional keyword arguments passed to xarray.interp + Returns ------- PointModelResult diff --git a/modelskill/obs.py b/modelskill/obs.py index e8ba1f6fe..a7361a154 100644 --- a/modelskill/obs.py +++ b/modelskill/obs.py @@ -6,6 +6,7 @@ -------- >>> o1 = PointObservation("klagshamn.dfs0", item=0, x=366844, y=6154291, name="Klagshamn") """ + from __future__ import annotations from typing import Literal, Optional, Any, Union @@ -142,10 +143,6 @@ def _parse_time(time): else: return time # can be RangeIndex - @property - def _aux_vars(self): - return list(self.data.filter_by_attrs(kind="aux").data_vars) - class PointObservation(Observation): """Class for observations of fixed locations @@ -224,14 +221,6 @@ def z(self): def z(self, value): self.data["z"] = value - def __repr__(self): - out = f"PointObservation: {self.name}, x={self.x}, y={self.y}" - if self.z is not None: - out += f", z={self.z}" - if len(self._aux_vars) > 0: - out += f", aux={self._aux_vars}" - return out - class TrackObservation(Observation): """Class for observation with locations moving in space, e.g. satellite altimetry @@ -346,13 +335,7 @@ def __init__( aux_items=aux_items, ) assert isinstance(data, xr.Dataset) - super().__init__(data=data, weight=weight,attrs=attrs) - - def __repr__(self): - out = f"TrackObservation: {self.name}, n={self.n_points}" - if len(self._aux_vars) > 0: - out += f", aux={self._aux_vars}" - return out + super().__init__(data=data, weight=weight, attrs=attrs) def unit_display_name(name: str) -> str: diff --git a/modelskill/plotting/_taylor_diagram.py b/modelskill/plotting/_taylor_diagram.py index 154d348f3..e12507a13 100644 --- a/modelskill/plotting/_taylor_diagram.py +++ b/modelskill/plotting/_taylor_diagram.py @@ -85,4 +85,6 @@ def taylor_diagram( ) fig.suptitle(title, size="x-large") + # prevent the plot from being displayed, since it is also displayed by the returned object + plt.close() return fig diff --git a/modelskill/settings.py b/modelskill/settings.py index c595744ae..7a2ee707e 100644 --- a/modelskill/settings.py +++ b/modelskill/settings.py @@ -2,10 +2,10 @@ The settings module holds package-wide configurables and provides a uniform API for working with them. -This module is heavily inspired by [pandas config module](https://github.com/pandas-dev/pandas/tree/main/pandas/_config) +This module is inspired by [pandas config module](https://github.com/pandas-dev/pandas/tree/main/pandas/_config). Overview -======== +-------- This module supports the following requirements: - options are referenced using keys in dot.notation, e.g. "x.y.option - z". @@ -21,14 +21,48 @@ - a developer can register an option. Implementation -============== +-------------- - Data is stored using nested dictionaries, and should be accessed through the provided API. - "Registered options" have metadata associated with them, which are stored in auxiliary dictionaries keyed on the fully-qualified key, e.g. "x.y.z.option". +Examples +-------- +>>> import modelskill as ms +>>> ms.options +metrics.list : [, (...)] +plot.rcparams : {} +plot.scatter.legend.bbox : {'facecolor': 'white', (...)} +plot.scatter.legend.fontsize : 12 +plot.scatter.legend.kwargs : {} +plot.scatter.oneone_line.color : blue +plot.scatter.oneone_line.label : 1:1 +plot.scatter.points.alpha : 0.5 +plot.scatter.points.label : +plot.scatter.points.size : 20 +plot.scatter.quantiles.color : darkturquoise +plot.scatter.quantiles.kwargs : {} +plot.scatter.quantiles.label : Q-Q +plot.scatter.quantiles.marker : X +plot.scatter.quantiles.markeredgecolor : (0, 0, 0, 0.4) +plot.scatter.quantiles.markeredgewidth : 0.5 +plot.scatter.quantiles.markersize : 3.5 +plot.scatter.reg_line.kwargs : {'color': 'r'} +>>> ms.set_option("plot.scatter.points.size", 4) +>>> plot.scatter.points.size +4 +>>> ms.get_option("plot.scatter.points.size") +4 +>>> ms.options.plot.scatter.points.size = 10 +>>> ms.options.plot.scatter.points.size +10 +>>> ms.reset_option("plot.scatter.points.size") +>>> ms.options.plot.scatter.points.size +20 """ + import yaml from pathlib import Path import re @@ -63,7 +97,6 @@ class RegisteredOption(NamedTuple): class OptionError(AttributeError, KeyError): - "Error in options handling, e.g. unknown option" pass @@ -80,7 +113,7 @@ def _get_single_key(pat: str) -> str: return key -def _get_option(pat: str) -> Any: +def get_option(pat: str) -> Any: """Get value of a single option matching a pattern Parameters @@ -100,13 +133,14 @@ def _get_option(pat: str) -> Any: return root[k] -def _set_option(*args, **kwargs) -> None: +def set_option(*args, **kwargs) -> None: """Set the value of one or more options Examples -------- - >>> modelskill.set_option("plot.scatter.point_size", 4) - >>> modelskill.set_option({"plot.scatter.point_size": 4}) + >>> ms.set_option("plot.scatter.points.size", 4) + >>> ms.set_option({"plot.scatter.points.size": 4}) + >>> ms.options.plot.scatter.points.size = 4 """ # must at least 1 arg deal with constraints later @@ -140,7 +174,7 @@ def _option_to_dict(pat: str = "") -> Dict: keys = _select_options(pat) d = dict() for k in keys: - d[k] = _get_option(k) + d[k] = get_option(k) return d @@ -149,7 +183,7 @@ def _describe_option_short(pat: str = "", _print_desc: bool = True) -> Optional[ if len(keys) == 0: raise OptionError("No such keys(s)") - s = "\n".join([f"{k} : {_get_option(k)}" for k in keys]) + s = "\n".join([f"{k} : {get_option(k)}" for k in keys]) if _print_desc: print(s) @@ -170,8 +204,21 @@ def _describe_option(pat: str = "", _print_desc: bool = True) -> Optional[str]: return s -def _reset_option(pat: str = "", silent: bool = False) -> None: - """Reset one or more options (matching a pattern) to the default value""" +def reset_option(pat: str = "", silent: bool = False) -> None: + """Reset one or more options (matching a pattern) to the default value + + Examples + -------- + >>> ms.options.plot.scatter.points.size + 20 + >>> ms.options.plot.scatter.points.size = 10 + >>> ms.options.plot.scatter.points.size + 10 + >>> ms.reset_option("plot.scatter.points.size") + >>> ms.options.plot.scatter.points.size + 20 + + """ keys = _select_options(pat) @@ -186,11 +233,14 @@ def _reset_option(pat: str = "", silent: bool = False) -> None: ) for k in keys: - _set_option(k, _registered_options[k].defval, silent=silent) + set_option(k, _registered_options[k].defval, silent=silent) class OptionsContainer: - """provide attribute-style access to a nested dict""" + """provide attribute-style access to a nested dict of options + + Accessed by ms.options + """ def __init__(self, d: Dict[str, Any], prefix: str = "") -> None: object.__setattr__(self, "d", d) @@ -204,7 +254,7 @@ def __setattr__(self, key: str, val: Any) -> None: # you can't set new keys # can you can't overwrite subtrees if key in self.d and not isinstance(self.d[key], dict): - _set_option(prefix, val) + set_option(prefix, val) else: raise OptionError("You can only set the value of existing options") @@ -220,7 +270,7 @@ def __getattr__(self, key: str): if isinstance(v, dict): return OptionsContainer(v, prefix) else: - return _get_option(prefix) + return get_option(prefix) def to_dict(self) -> Dict: """Return options as dictionary with full-name keys""" @@ -285,15 +335,15 @@ def _build_option_description(k: str) -> str: s += "No description available." if o: - s += f"\n [default: {o.defval}] [currently: {_get_option(k)}]" + s += f"\n [default: {o.defval}] [currently: {get_option(k)}]" return s # temporary disabled -get_option = _get_option -set_option = _set_option -reset_option = _reset_option +# get_option = _get_option +# set_option = _set_option +# reset_option = _reset_option # describe_option = _describe_option options = OptionsContainer(_global_settings) diff --git a/modelskill/skill.py b/modelskill/skill.py index 419807f8b..37442f6be 100644 --- a/modelskill/skill.py +++ b/modelskill/skill.py @@ -312,7 +312,17 @@ class SkillArray: def __init__(self, data: pd.DataFrame) -> None: self.data = data self._ser = data.iloc[:, -1] # last column is the metric + self.plot = SkillArrayPlotter(self) + """Plot using the SkillArrayPlotter + + Examples + -------- + >>> sk.rmse.plot.line() + >>> sk.rmse.plot.bar() + >>> sk.rmse.plot.barh() + >>> sk.rmse.plot.grid() + """ def to_dataframe(self, drop_xy: bool = True) -> pd.DataFrame: """Convert SkillArray to pd.DataFrame diff --git a/modelskill/timeseries/_timeseries.py b/modelskill/timeseries/_timeseries.py index 0db1f9130..9586fd064 100644 --- a/modelskill/timeseries/_timeseries.py +++ b/modelskill/timeseries/_timeseries.py @@ -126,7 +126,15 @@ class TimeSeries: def __init__(self, data: xr.Dataset) -> None: self.data = data if self._is_input_validated(data) else _validate_dataset(data) + self.plot: TimeSeriesPlotter = TimeSeries.plotter(self) + """Plot using the ComparerPlotter + + Examples + -------- + >>> obj.plot.timeseries() + >>> obj.plot.hist() + """ def _is_input_validated(self, data: Any) -> bool: """Check if data is already a valid TimeSeries (contains the modelskill_version attribute)""" @@ -224,8 +232,20 @@ def _values_as_series(self) -> pd.Series: """Values to series (for plotting)""" return self.data[self.name].to_series() + @property + def _aux_vars(self): + return list(self.data.filter_by_attrs(kind="aux").data_vars) + def __repr__(self) -> str: - return f"<{self.__class__.__name__}> '{self.name}' (n_points: {self.n_points})" + res = [] + res.append(f"<{self.__class__.__name__}>: {self.name}") + if self.gtype == str(GeometryType.POINT): + res.append(f"Location: {self.x}, {self.y}") + res.append(f"Time: {self.time[0]} - {self.time[-1]}") + res.append(f"Quantity: {self.quantity}") + if len(self._aux_vars) > 0: + res.append(f"Auxiliary variables: {', '.join(self._aux_vars)}") + return "\n".join(res) # len() of a DataFrame returns the number of rows, # len() of xr.Dataset returns the number of variables diff --git a/pyproject.toml b/pyproject.toml index f1c02cb6f..7cb2370ae 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ exclude = ["notebooks", "tests"] [project] name="modelskill" -version="1.0.dev24" +version="1.1.dev0" dependencies = [ "numpy >= 1.20.0", "pandas >= 1.4", @@ -28,8 +28,7 @@ readme = "README.md" requires-python = ">=3.8" classifiers = [ "License :: OSI Approved :: MIT License", - "Development Status :: 4 - Beta", # TODO: change to stable - #"Development Status :: 5 - Production/Stable", + "Development Status :: 5 - Production/Stable", "Intended Audience :: Science/Research", "Programming Language :: Python", "Programming Language :: Python :: 3", @@ -44,7 +43,7 @@ classifiers = [ [project.optional-dependencies] dev = ["pytest", "mkdocs==1.5.3", - "mkdocs-material==9.4.14", + "mkdocs-material==9.5.6", "mkdocstrings==0.24.0", "mkdocstrings-python==1.7.5", "black==22.3.0", diff --git a/tests/test_aggregated_skill.py b/tests/test_aggregated_skill.py index dfcafa9f9..0e6433d2c 100644 --- a/tests/test_aggregated_skill.py +++ b/tests/test_aggregated_skill.py @@ -87,17 +87,14 @@ def test_skill_table(sk_df1): assert sk.metrics == ["n", "bias", "rmse", "corr", "si", "r2"] -def test_skill_table_odd_index(sk_df2): - # having a different index name works - sk_df2.index.name = "odd" - sk = ms.SkillTable(sk_df2) - assert sk.obs_names == [] - assert sk.mod_names == [] - assert sk.quantity_names == [] - assert sk.metrics == ["n", "bias", "rmse", "corr"] +def test_skill_repr_html(sk_df1): + sk = ms.SkillTable(sk_df1) + repr_html = sk._repr_html_() + assert "obs1" in repr_html -def test_skill_table_2rows(sk_df2): +def test_skill_table_odd_index(sk_df2): + # having a different index name works sk = ms.SkillTable(sk_df2) assert sk.obs_names[0] == "obs1" assert sk.obs_names[1] == "obs2" diff --git a/tests/test_comparer.py b/tests/test_comparer.py index 9b01f9a9b..a6be8a4c1 100644 --- a/tests/test_comparer.py +++ b/tests/test_comparer.py @@ -5,6 +5,7 @@ import matplotlib.pyplot as plt from modelskill.comparison import Comparer from modelskill import __version__ +import modelskill as ms @pytest.fixture @@ -811,3 +812,57 @@ def test_plots_directional(pt_df): ax = cmp.plot.timeseries() assert ax is not None assert ax.get_ylim() == (0.0, 360.0) + + +def test_from_matched_track_data(): + + df = pd.DataFrame( + { + "lat": [55.0, 55.1], + "lon": [-0.1, 0.01], + "c2": [1.2, 1.3], + "mikeswcal5hm0": [1.22, 1.3], + }, + ) + assert isinstance(df.index, pd.RangeIndex) # Sometime we don't care about time only space + + cmp = ms.from_matched( + data=df, obs_item="c2", mod_items="mikeswcal5hm0", x_item="lon", y_item="lat" + ) + gs = cmp.gridded_skill(bins=2) + gs.data.sel(x=-0.01, y=55.1, method="nearest").n.values == 1 + + # positional args + cmp2 = ms.from_matched( + data=df, + x_item=0, + y_item=1, + obs_item=2, + mod_items=3, + ) + + assert len(cmp2.data.coords["x"]) == 2 + + +def test_from_matched_dfs0(): + fn = "tests/testdata/matched_track_data.dfs0" + # time: 2017-10-27 10:45:19 - 2017-10-29 13:10:44 (532 non-equidistant records) + # geometry: GeometryUndefined() + # items: + # 0: x (undefined) + # 1: y (undefined) + # 2: HD (undefined) + # 3: Observation (undefined) + + cmp = ms.from_matched( + data=fn, + x_item=0, + y_item=1, + obs_item=3, + mod_items=2, + quantity=ms.Quantity("Water level", "m"), + ) + gs = cmp.gridded_skill() + assert float( + gs.data.sel(x=-0.01, y=55.1, method="nearest").rmse.values + ) == pytest.approx(0.0476569069177831) diff --git a/tests/test_comparercollection.py b/tests/test_comparercollection.py index 544d06a35..dec0b8b32 100644 --- a/tests/test_comparercollection.py +++ b/tests/test_comparercollection.py @@ -569,3 +569,13 @@ def test_peak_ratio_2(cc_pr): sk = cc_pr.skill(metrics=["peak_ratio"]) assert "peak_ratio" in sk.data.columns assert sk.to_dataframe()["peak_ratio"].values == pytest.approx(1.0799999095653732) + + +def test_copy(cc): + cc2 = cc.copy() + assert cc2.n_models == 3 + assert cc2.n_points == 10 + assert cc2.start_time == pd.Timestamp("2019-01-01") + assert cc2.end_time == pd.Timestamp("2019-01-07") + assert cc2.obs_names == ["fake point obs", "fake track obs"] + assert cc2.mod_names == ["m1", "m2", "m3"] diff --git a/tests/test_pointcompare.py b/tests/test_pointcompare.py index 764c2aecb..24f0398af 100644 --- a/tests/test_pointcompare.py +++ b/tests/test_pointcompare.py @@ -67,17 +67,17 @@ def test_subset_cc_for_named_comparers(cc): ccs = cc[("Klagshamn", "dmi_30357_Drogden_Fyr")] assert len(ccs) == 2 - assert ( - repr(ccs) - == "\nComparer: Klagshamn\nComparer: dmi_30357_Drogden_Fyr" - ) + repr_text = repr(ccs) + assert "" in repr_text + assert "Klagshamn" in repr_text + assert "dmi_30357_Drogden_Fyr" in repr_text ccs2 = cc[["dmi_30357_Drogden_Fyr", "Klagshamn"]] + repr_text = repr(ccs2) assert len(ccs2) - assert ( - repr(ccs2) - == "\nComparer: dmi_30357_Drogden_Fyr\nComparer: Klagshamn" - ) + assert "" in repr_text + assert "Klagshamn" in repr_text + assert "dmi_30357_Drogden_Fyr" in repr_text def test_iterate_over_comparers(cc): diff --git a/tests/testdata/matched_track_data.dfs0 b/tests/testdata/matched_track_data.dfs0 new file mode 100644 index 000000000..e1f5fcff6 Binary files /dev/null and b/tests/testdata/matched_track_data.dfs0 differ