# Reading Matroos observations

This notebook introduces how to use the `hydropandas` package to read, process and visualise data obtained using the Matroos API.

In [None]:
import contextily as ctx
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import hydropandas as hpd
from hydropandas.io.matroos import load_parameter_metadata, select_parameters

# enabling debug logging so we can see what happens in the background
hpd.util.get_color_logger("INFO");

In [None]:
# get data from a certain location and grootheid
tmin = pd.Timestamp("2020-1-1")
tmax = pd.Timestamp("2020-1-3")
o1 = hpd.WaterlvlObs.from_matroos(
    location="schoonhoven", unit="waterlevel", source="observed", tmin=tmin, tmax=tmax
)
o1

In [None]:
extent = [100_500, 120_000, 430_000, 457_000]  # Schoonhoven
oc = hpd.read_matroos(
    extent=extent, units="waterlevel", sources="observed", keep_all_obs=False
)
oc

In [None]:
oc.plots.interactive_map()

## Find selection criteria

You may not know which location, unit and source keywords you have to use. The code below will give you some guidance on how to find them.

In [None]:
# find all units and sources for schoonhoven
selected = select_parameters(
    locations="schoonhoven", astype="dataframe", keep_coords=False
)
print("All units and sources available for Schoonhoven:")
display(selected)
print("\n")

# find all sources for multiple locations and unit waterlevel
selected = select_parameters(
    locations=["schoonhoven", "Nieuwpoort"],
    units="waterlevel",
    astype="dataframe",
    keep_coords=False,
)
print("Units and source for Schoonhoven and Nieuwpoort:")
display(selected)
print("\n")

# find all locations for observed waterlevels
selected = select_parameters(
    units="waterlevel", sources="observed", astype="dataframe", keep_coords=False
)
print("Locations for observed waterlevels:")
display(selected)

In [None]:
params_dic = load_parameter_metadata()

# print all locations
all_locations = list(params_dic.keys())
print("all locations:")
print(all_locations, "\n")

# print all units
all_units = np.unique(
    [unit for loc in params_dic.values() for unit in loc["units"].keys()]
)
print("all units:")
print(all_units, "\n")

# print all sources
all_sources = np.unique(
    [s for loc in params_dic.values() for unit in loc["units"].values() for s in unit]
)
print("all sources:")
print(all_sources, "\n")

In [None]:
# plot all locations with waterlevel measurements
units = "waterlevel"
sources = "observed"

# get all locations with waterlevel measurements
selected = select_parameters(units=units, sources=sources, astype="geodataframe")
selected2 = select_parameters(units=units, sources=None, astype="geodataframe")
selected_other_sources = selected2.loc[~selected2.index.isin(selected.index)]

# plot locations
f, ax = plt.subplots(figsize=(10, 10))
selected_other_sources.plot(ax=ax, color="orange", marker="x", label="other sources")
selected.plot(ax=ax, label=f"source={sources}")
ax.set_title(f"locations with {units} measurements")
ax.legend()
ctx.add_basemap(ax=ax, crs=28992, alpha=0.5)

##