# Appending to an Icechunk Store with Virtual References
This notebook demonstrates how to append to an icechunk store.

In [None]:
import os

import fsspec
import icechunk

import xarray as xr
from obstore.store import from_url

from virtualizarr import open_virtual_dataset
from virtualizarr.parsers import HDFParser
from virtualizarr.registry import ObjectStoreRegistry

In [None]:
import warnings
warnings.filterwarnings("ignore", category=UserWarning)

In [None]:
import virtualizarr
print(icechunk.__version__)
print(virtualizarr.__version__)

In [None]:
# load AWS credentials for Pangeo-EOSC storage as environment vars
from dotenv import load_dotenv
_ = load_dotenv(f'{os.environ['HOME']}/dotenv/school_2025.env')

username = os.environ['JUPYTERHUB_USER']

# Define storage
storage_endpoint = 'https://pangeo-eosc-minioapi.vm.fedcloud.eu'
data_bucket = 'rsignell4-protocoast'
storage_bucket = 'protocoast-school-2025'
storage_name = f'{username}-taranto'

In [None]:
fs = fsspec.filesystem('s3', anon=False, endpoint_url=storage_endpoint)
flist = fs.glob(f'{data_bucket}/full_dataset/shyfem/taranto/forecast/*/*nos*.nc')
flist = [f's3://{f}' for f in flist]

In [None]:
print(len(flist))
print(flist[-1])

### Define our Virtualizarr `Parser` and `ObjectStoreRegistry`

In [None]:
bucket = f"s3://{data_bucket}"
store = from_url(bucket, region="not-used", endpoint=storage_endpoint)
registry = ObjectStoreRegistry({bucket: store})
parser = HDFParser()

## Create virtual datasets with VirtualiZarr's `open_virtual_dataset`

In [None]:
ds_list = [
    open_virtual_dataset(
        url=url,
        parser=parser,
        registry=registry,
        loadable_variables=["time"],
    )
    for url in flist[-1:]
]

In [None]:
print(flist[-1])

In [None]:
def fix_ds(ds):
    ds = ds.rename_vars(time='valid_time')
    ds = ds.rename_dims(time='step')
    step = (ds.valid_time - ds.valid_time[0]).assign_attrs({"standard_name": "forecast_period"})
    time = ds.valid_time[0].assign_attrs({"standard_name": "forecast_reference_time"})
    ds = ds.assign_coords(step=step, time=time)
    ds = ds.drop_indexes("valid_time")
    ds = ds.drop_vars('valid_time')
    return ds

In [None]:
ds_list = [fix_ds(ds) for ds in ds_list]

In [None]:
combined_nos = xr.concat(
    ds_list,
    dim="time",
    coords="minimal",
    compat="override",
    combine_attrs="override",
)

In [None]:
flist = fs.glob(f'{data_bucket}/full_dataset/shyfem/taranto/forecast/*/*ous*.nc')
flist = [f's3://{f}' for f in flist]

In [None]:
ds_list = [
    open_virtual_dataset(
        url=url,
        parser=parser,
        registry=registry,
        loadable_variables=["time"],
    )
    for url in flist[-1:]
]

In [None]:
print(flist[-1])

In [None]:
ds_list = [fix_ds(ds) for ds in ds_list]

In [None]:
combined_ous = xr.concat(
    ds_list,
    dim="time",
    coords="minimal",
    compat="override",
    combine_attrs="override",
)

In [None]:
ds = xr.merge([combined_nos, combined_ous], compat='override')

## Initialize the Icechunk Store
We need configure the `virtual_chunk_container` as make sure the icechunk container credentials allow for anonymous access. 
Details on this can be found [here](https://icechunk.io/en/stable/virtual/).

In [None]:
storage = icechunk.s3_storage(
    bucket=storage_bucket,
    prefix=f"icechunk/{storage_name}",
    anonymous=True,
    endpoint_url=storage_endpoint,
    region='not-used',   # N/A for Pangeo-EOSC bucket, but required param
    force_path_style=True)

In [None]:
config = icechunk.RepositoryConfig.default()

config.set_virtual_chunk_container(
    icechunk.VirtualChunkContainer(
        url_prefix=f"s3://{data_bucket}/",
        store=icechunk.s3_store(region="not-used", anonymous=True, s3_compatible=True, 
                                force_path_style=True, endpoint_url=storage_endpoint),
    ),
)


In [None]:
credentials = icechunk.containers_credentials(
    {f"s3://{data_bucket}/": icechunk.s3_credentials(anonymous=False)})
repo = icechunk.Repository.open(storage, config, authorize_virtual_chunk_access=credentials)

#read_session = read_repo.readonly_session("main")

In [None]:
append_session = repo.writable_session("main")

In [None]:
ds.virtualize.to_icechunk(append_session.store, append_dim="time")

In [None]:
append_session.commit("wrote last day of data")

# Check that it worked!
Let's create a read-only icechunk session and pass in the authorization credentials for the[ Virtual Chunk Containers](https://icechunk.io/en/latest/configuration/#virtual-chunk-credentials) to Icechunk.

In [None]:
credentials = icechunk.containers_credentials(
    {f"s3://{data_bucket}/": icechunk.s3_credentials(anonymous=False)})

read_repo = icechunk.Repository.open(
    storage, config, authorize_virtual_chunk_access=credentials)

read_session = read_repo.readonly_session("main")

In [None]:
ds = xr.open_zarr(read_session.store, consolidated=False, zarr_format=3)

In [None]:
print(ds)