# Exploring "HLSS30" using tensorlakehouse

### Data description
* Collection/dataset:"HLSS30"
* Data Source: GeoTiff stored on Cloud Object Store (COS) 

### Functionalities
* tensorlakehouse (openEO) functionalities:
    - describe collection
    - load collection

### Setup

1. Create a python virtualenv (python 3.11.7 is the recommended version). 
2. Clone the repository:
```
git clone https://github.com/IBM/tensorlakehouse-openeo-driver.git
``` 

3. Go to
```
cd tensorlakehouse-openeo-driver/tutorials
```

4. Install dependencies:
```
pip install -r tutorial_requirements.txt
```

5. Run jupyter notebook or jupyter lab
```
jupyter lab .
```


In [3]:
import rioxarray as rxr
import os
os.environ['USE_PYGEOS'] = '0'
import matplotlib.pyplot as plt
from pathlib import Path
# openeo is a client-side implementation that allows users to query OpenEO service
import openeo
# pystac_client is a client-side implementation that allows users to query STAC service
from pystac_client import Client
# OpenEO service URL
import os
from dotenv import load_dotenv
import pandas as pd
import xarray as xr


Set URL and credentials of the OpenEO service

In [4]:
# username and password are hardcoded in the backend as an example for testing
load_dotenv()
OPENEO_URL="https://tensorlakehouse-openeo-driver-geospatial-be-staging.apps.fmaas-backend.fmaas.res.ibm.com"
username = os.environ["OPENEO_USERNAME"]
password = os.environ["OPENEO_PASSWORD"]

# authenticate using basic auth
openeo_conn = openeo.connect(OPENEO_URL).authenticate_basic(username, password)


KeyboardInterrupt: 

In [None]:
# set time interval
start ='2020-07-01T19:13:57Z'
end = '2020-07-01T20:13:57Z'
time_range = f"{start}/{end}"

# set bounding box
west = -123.0
east = -122.9
south = 38.0
north = 38.1


collection_id = "HLSS30"
spatial_extent={
        'west' : west,
        'south' : south,
        'east' : east,
        'north' : north
    }

temporal_extent=[start, end]
# create data dir to save files
data_dir = Path("test_data")
if not data_dir.exists():
    data_dir.mkdir()

In [None]:
openeo_conn.describe_collection(collection_id)

In [None]:
bands = ["B02"]
print(f"{collection_id=} {spatial_extent=} {temporal_extent=} {bands=}")
cube = openeo_conn.load_collection(
    collection_id=collection_id,
    spatial_extent=spatial_extent,
    temporal_extent=temporal_extent,
    bands=bands,
    properties=[
        openeo.collection_property("cloud_coverage") <= 70,
    ]
)
# reproject cube to EPSG:4326 (note: resolution=0 means that no regridding is done)
cube = cube.resample_spatial(projection=4326, resolution=0)
# daily aggregation using minimum value
cube = cube.aggregate_temporal_period(
    period = "day",
    reducer = "min"
)

In [None]:
output_format = "netCDF"
merged_cube = cube.save_result(output_format)
import json
print(json.loads(cube.to_json()))

In [None]:
filename = f'{collection_id}_result.nc'


# delete file if it already exists
path = data_dir / filename
if path.exists():
    path.unlink()
# measure elapsed time
start = pd.Timestamp.now()
print(f"Start the download at {start}")
# download file
cube.download(path)

end = pd.Timestamp.now()
elapsed_time = end - start
print(f"{end.isoformat()} Downloaded file: {path} elapsed time={elapsed_time}")

In [None]:
ds = xr.open_dataset(path)

In [None]:
ds.time.values

In [None]:
# open all raster files and concatenate

da = ds[bands[0]]
da.isel({"time":0}).plot()