# Download SWOT Pixel Cloud products from hydroweb.next and extract information of interest for your study in a Zarr (zcollection) Database for future use


## Setting the spatial zone and the time period of interest
Using a geopackage layer, preliminary created with, e.g. QGIS, to limit data download and database

In [1]:
from pixcdust.downloaders.hydroweb_next import PixCDownloader
import geopandas as gpd
from datetime import datetime

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# reading the area of interest polygon (could have been set)
gdf_geom = gpd.read_file('/home/hysope2/STUDIES/SWOT_Panama/DATA/aoi.gpkg')

# Limiting time period
dates = (
    datetime(2023,4,6),
    datetime(2023,4,8),
)



## Let's download matching data.
This will unfortunately lead to downloading many big files (that will be removed later). This is the only way right, but the hydroweb.next team is working on improving that.

In [3]:
pixcdownloader = PixCDownloader(
    gdf_geom,
    dates,
    verbose=0,
    path_download='/tmp/pixc',
    )
pixcdownloader.search_download()

ValueError: Did not find collection_name in list of available collections in hydroweb_next.
Available collections are: ['FLOODDAM_CERFACS_FLOOD_EXTENT', 'FLOODDAM_CERFACS_VELOCITY', 'FLOODDAM_CERFACS_WATER_DEPTH_RASTER', 'FLOODDAM_CERFACS_WATER_DEPTH_VECTOR', 'FLOODDAM_FLOODML_FLOOD_MAPPING', 'FLOODDAM_QUANTCUBE', 'GRS_L2A_S2', 'HYDROWEB_LAKES_OPE', 'HYDROWEB_LAKES_RESEARCH', 'HYDROWEB_RIVERS_OPE', 'HYDROWEB_RIVERS_RESEARCH', 'LIS_FSC_PREOP', 'LIS_SNT_YEARLY', 'SWOT_L1B_HR_SLC', 'SWOT_L1B_HR_SLC_SAMPLE_V1_2', 'SWOT_L2_HR_LAKEAVG', 'SWOT_L2_HR_LAKESP_OBS', 'SWOT_L2_HR_LAKESP_OBS_SAMPLE_V1_2', 'SWOT_L2_HR_LAKESP_PRIOR', 'SWOT_L2_HR_LAKESP_PRIOR_SAMPLE_V1_2', 'SWOT_L2_HR_LAKESP_UNASSIGNED', 'SWOT_L2_HR_LAKESP_UNASSIGNED_SAMPLE_V1_2', 'SWOT_L2_HR_PIXC', 'SWOT_L2_HR_PIXCVEC', 'SWOT_L2_HR_PIXCVEC_SAMPLE_V1_2', 'SWOT_L2_HR_PIXC_SAMPLE_V1_2', 'SWOT_L2_HR_RASTER_100M', 'SWOT_L2_HR_RASTER_100M_SAMPLE_V1_2', 'SWOT_L2_HR_RASTER_250M', 'SWOT_L2_HR_RASTER_250M_SAMPLE_V1_2', 'SWOT_L2_HR_RIVERAVG', 'SWOT_L2_HR_RIVERSP_NODE', 'SWOT_L2_HR_RIVERSP_NODE_SAMPLE_V1_2', 'SWOT_L2_HR_RIVERSP_REACH', 'SWOT_L2_HR_RIVERSP_REACH_SAMPLE_V1_2', 'SWOT_PRIOR_LAKE_DATABASE', 'SWOT_PRIOR_RIVER_DATABASE', 'SW_L2_S2_SINGLE', 'SW_L3_S2_MONTHLY', 'SW_L3_S2_YEARLY', 'WQ_L2B_S2']

## Now we have all necessary files, let us extract key variables within area of interest in a Zarr (zcollection) database.
This Zarr partionned format is very efficient for time analysis, but is not currently accessible in GIS softwares such as QGIS
We are using the same geodataframe to limit the data to the area of interest

In [None]:
from pixcdust.converters.zarr import PixCNc2ZarrConverter
from glob import glob

In [None]:
# nb: it is important to provide files in a time strictly ascending or descending order, use the sorted function for this
pixc = PixCNc2ZarrConverter(
            sorted(glob(pixcdownloader.path_download+'/*/*nc')),
            "/tmp/my_awesome_pixc_zarr",
            variables=['height', 'sig0', 'classification'],
            area_of_interest=gdf_geom,
            mode='o',
        )
pixc.database_from_nc()

database has been succesfully created, we can remove the raw files

In [None]:
import shutil
shutil.rmtree('/tmp/pixc')

# Let us use our Zarr Database
previous steps are not necessary

Now we can open this database in a xarray, or dataframe, or GeoDataFrame

In [None]:
from pixcdust.readers.zarr import PixCZarrReader
import datetime

pixc_read = PixCZarrReader(
    "/tmp/my_awesome_pixc_zarr"
)
pixc_read.read((datetime.datetime(2023,4,10), datetime.datetime(2023,4,12)))
pixc_read.data

In [None]:
gdf_pixc = pixc_read.to_geodataframe()
gdf_pixc

Enjoy!