## packages

In [1]:
# general python
import warnings
warnings.filterwarnings("ignore", category=UserWarning)
import numpy as np
from pathlib import Path
import pandas as pd
import matplotlib.pyplot as plt
# from rich import print

In [2]:
# general eWC
import ewatercycle
import ewatercycle.forcing
import ewatercycle.models
from ewatercycle.util import get_time

In [3]:
from shutil import copytree
from unittest import mock

import pytest
import xarray as xr

In [4]:
path = Path.cwd()
forcing_path = path / "Forcing"

In [5]:
camels_path = forcing_path  / 'Camels'

In [6]:
datasets = list(camels_path.glob("camels_03439000*.nc"))

### Funtions to test

In [7]:
from ewatercycle._forcings.caravan import (
    CaravanForcing,
    get_shapefiles,
    extract_basin_shapefile,
    crop_ds,
)

ModuleNotFoundError: No module named 'ewatercycle._forcings.caravan'

In [None]:
@pytest.fixture
def mock_retrieve():
    with mock.patch("xarray.Dataset") as mock_class:
        mock_class.return_value = xr.open_dataset(forcing_path / "test_caravan_file.nc")
        yield mock_class

In [8]:
xr.open_dataset(datasets[0]) 

IndexError: list index out of range

In [17]:
COMMON_URL = "ca13056c-c347-4a27-b320-930c2a4dd207"
OPENDAP_URL = f"https://opendap.4tu.nl/thredds/dodsC/data2/djht/{COMMON_URL}/1/"
SHAPEFILE_URL = (
    f"https://data.4tu.nl/file/{COMMON_URL}/bbe94526-cf1a-4b96-8155-244f20094719"
)
basin_id = "camels_03439000"
dataset = basin_id.split("_")[0]

In [41]:
run = False
if run:
    ds = xr.open_dataset(f"{OPENDAP_URL}{dataset}.nc")
    start_time = '1981-01-01T00:00:00.000000000Z'
    end_time = '1982-01-01T00:00:00.000000000Z'
    ds_basin = ds.sel(basin_id=["camels_01022500".encode(),"camels_03439000".encode()])
    ds_basin_time = crop_ds(ds_basin, start_time, end_time)
    ds_basin_time.to_netcdf(forcing_path / "test_caravan_file.nc")
else: 
    ds_basin_time = xr.open_dataset(forcing_path / "test_caravan_file.nc")

In [26]:
def crop_ds(ds: xr.Dataset, start_time: str, end_time: str) -> xr.Dataset:
    """Crops dataset based on time."""
    get_time(start_time), get_time(end_time)  # if utc, remove Z to parse to np.dt64
    start, end = np.datetime64(start_time[:-1]), np.datetime64(end_time[:-1])
    return ds.isel(
        time=(ds["time"].to_numpy() >= start) & (ds["time"].to_numpy() <= end)
    )


In [42]:
ds_basin_time

In [None]:
gdf_all = gpd.read_file(caravan_forcing.directory / 'shapefiles' / 'combined.shp')

In [44]:
gdf_all.iloc[:2].to_file(forcing_path/ "Camels" / "test_extract_basin_shapefile_data.shp")

In [56]:
from cartopy.io import shapereader
import fiona

In [57]:
def extract_basin_shapefile(
    basin_id: str,
    combined_shapefile_path: Path,
    shape_path: Path,
) -> None:
    """Extract single polygon from multipolygon shapefile."""
    shape_obj = shapereader.Reader(combined_shapefile_path)
    list_records = []
    for record in shape_obj.records():
        list_records.append(record.attributes["gauge_id"])

    df = pd.DataFrame(
        data=list_records, index=range(len(list_records)), columns=["basin_id"]
    )
    basin_index = df[df["basin_id"] == basin_id].index.array[0]

    with fiona.open(combined_shapefile_path) as src:
        dst_schema = src.schema  # Copy the source schema
        # Create a sink for processed features with the same format and
        # coordinate reference system as the source.
        with fiona.open(
            shape_path,
            mode="w",
            layer=basin_id,
            crs=src.crs,
            driver="ESRI Shapefile",
            schema=dst_schema,
        ) as dst:
            for i, feat in enumerate(src):
                # kind of clunky but it works: select filtered polygon
                if i == basin_index:
                    geom = feat.geometry
                    assert geom.type == "Polygon"

                    # Add the signed area of the polygon and a timestamp
                    # to the feature properties map.
                    props = fiona.Properties.from_dict(
                        **feat.properties,
                    )

                    dst.write(fiona.Feature(geometry=geom, properties=props))

In [73]:
basin_id = "camels_01022500"
test_files_dir = forcing_path/ "Camels" / "test_extract_basin_shapefile_data.shp"
tmp_camels_dir = forcing_path / "Camels" / f"{basin_id}.shp"
extract_basin_shapefile(basin_id, test_files_dir, tmp_camels_dir)

In [74]:
shape_obj = shapereader.Reader(tmp_camels_dir)

In [85]:
rec = [rec for rec in shape_obj.records()]

In [87]:
len(rec)

1

In [88]:
rec[0].attributes['gauge_id'] == basin_id

True