# Reading water connect observations

This notebook introduces how to use the `hydropandas` package to read, process and visualise data from the South Australia Water Connect database.

In [None]:
import contextily as cx
import matplotlib.pyplot as plt
import numpy as np

import hydropandas as hpd
from hydropandas.io.water_connect import get_locations_gdf, get_locations_within_extent

# enabling logging so we can see what happens in the background
hpd.util.get_color_logger("INFO");

In [None]:
# get water connect observations within an extent
extent = (
    140.86,
    140.9,
    -38.05,
    -38.00,
)  # (left, right, lower, upper) bounds in lat/lon
oc = hpd.read_waterconnect(extent=extent, tmin="2000-1-1")
oc

In [None]:
# plot locations on map
ax = oc.to_gdf(crs=4326).plot(
    figsize=(10, 10),
)
cx.add_basemap(ax=ax, crs=4326)

# add labels
for idx, row in oc.iterrows():
    ax.annotate(text=idx, xy=(row["x"], row["y"]), horizontalalignment="center")

In [None]:
# plot measurements from a single observation
o = oc.get_obs(105147)
o["rswl"].plot(marker=".", label=o.name, ylabel=o.unit, legend=True)

In [None]:
# get data from a certain measurement well based on the drillhole number (dh_no)
o1 = hpd.GroundwaterObs.from_waterconnect(95360)
o1

In [None]:
# get water levels from another drillhole and plot both
o2 = hpd.GroundwaterObs.from_waterconnect(119988)

# plot data
f, ax = plt.subplots(figsize=(10, 4))
o1["rswl"].plot(ylabel=o1.unit, label=o1.name, marker=".", color="b", ax=ax)
o2["rswl"].plot(ylabel=o2.unit, label=o2.name, marker=".", ax=ax, color="g")
ax.axhline(o1.ground_level, ls=":", color="b", label=f"ground level {o1.name}")
ax.axhline(o2.ground_level, ls=":", color="g", label=f"ground level {o2.name}")
ax.legend()

## Find selection criteria

Very often you don't know exactly the drillhole numbers (dh_no) of the measurements you want to download and it may be unfeasable to download all the observations in the extent. To get the data that you want you can follow these steps:
1. get a geodataframe with the metadata of all the locations in the extent
2. query the geodataframe by any metadata property such as REF_ELEV, STATUS, MAX_DEPTH
3. request the measurement for the locations you obtained at step 2

In [None]:
# step 1
extent = (
    140.86,
    140.9,
    -38.05,
    -38.00,
)  # (left, right, lower, upper) bounds in lat/lon

# get all locations
gdf = get_locations_gdf()
gdf_extent = get_locations_within_extent(gdf, extent)
gdf_extent

In [None]:
# 2 query the GeoDataFrame

# print statistics of the reference elevation
gdf_extent.loc[gdf_extent["REF_ELEV"] == -9999, "REF_ELEV"] = np.nan
print(f"statistics of the reference elevation:\n{gdf_extent['REF_ELEV'].describe()}\n")

# print unique names
print(f"unique value in status column:\n {gdf_extent['STAT_DESC'].unique()}\n")

# print statistics of the maximum depth
gdf_extent.loc[gdf_extent["MAX_DEPTH"] == -9999, "MAX_DEPTH"] = np.nan
print(f"statistics of the max depth:\n{gdf_extent['MAX_DEPTH'].describe()}\n")

# select all observation points with some conditions
gdf_selection = gdf_extent.loc[
    (gdf_extent["REF_ELEV"] > 12.5) & (gdf_extent["MAX_DEPTH"] > 15)
]
print("selected data:\n")
display(gdf_selection)

In [None]:
# 3 read data for selection criteria
oc = hpd.read_waterconnect(location_gdf=gdf_selection)
oc

In [None]:
# plot the data on an interactive map
oc["lat"] = oc["y"]
oc["lon"] = oc["x"]
oc.plots.interactive_map()