## Icechunk Version Control and Branching
Showcase for adding new data over time to an icechunk store, "time traveling", and making new branches

Requires `.env` with `data` account credentials

In [None]:
from pathlib import Path

from icefabric.helpers import load_creds

# dir is where the .env file is located
load_creds(dir=Path.cwd().parents[1])

In [None]:
import warnings

from icefabric.builds import IcechunkRepo, S3Path
from icefabric.helpers import virtualize_and_concat_archival_files_on_time
from icefabric.schemas import FileType, NGWPCLocations

warnings.filterwarnings("ignore")

new_repo_s3_path = S3Path(bucket="hydrofabric-data", prefix="ic_testing/snodas_yearly_append_test")
new_repo = IcechunkRepo(location=new_repo_s3_path)

In [None]:
# Print repo ancestry
new_repo.print_history()

In [None]:
# Collect first five SNODAS netcdf files from 2009 and combine/virtualize them together into a single dataset
snodas_09_vds = virtualize_and_concat_archival_files_on_time(
    location=NGWPCLocations.SNODAS_REF.path,
    file_date_pattern="zz_ssmv11034tS__T0001TTNATS*05HP001.nc",
    file_type=FileType.NETCDF,
    manual_file_pattern="zz_ssmv11034tS__T0001TTNATS2009*.nc",
    loadable_vars=["crs"],
    testing_file_quantity=5,
)

# Add 09 data to SNODAS repo with a new snapshot
new_repo.write_dataset(ds=snodas_09_vds, virtualized=True, commit="First commit! 09 data added.")

In [None]:
# Now that we have a new snapshot, reprint the repo ancestry
new_repo.print_history()

In [None]:
# Print the data now contained within the SNODAS repo
snodas_data = new_repo.retrieve_dataset()
print(snodas_data)

In [None]:
# Much like the 09 SNODAS files were collected, do the same for 2010
snodas_10_vds = virtualize_and_concat_archival_files_on_time(
    location=NGWPCLocations.SNODAS_REF.path,
    file_date_pattern="zz_ssmv11034tS__T0001TTNATS*05HP001.nc",
    file_type=FileType.NETCDF,
    manual_file_pattern="zz_ssmv11034tS__T0001TTNATS2010*.nc",
    loadable_vars=["crs"],
    testing_file_quantity=5,
)

# Append 2010 data to SNODAS repo with a new snapshot
new_repo.append_virt_data_to_store(
    vds=snodas_10_vds, append_dim="time", commit="Appended new data from the year 2010"
)

In [None]:
# Now that we have another new snapshot with 2010 data, reprint the repo ancestry
new_repo.print_history()

In [None]:
# Print the new repo collection with both 2009 and 2010 data
snodas_data = new_repo.retrieve_dataset()
print(snodas_data)

In [None]:
# Retrieve and print the data from the previous snapshot, before 2010 data was added
prev_snap_snodas_data = new_repo.retrieve_prev_snapshot()
print(prev_snap_snodas_data)

In [None]:
# Make a new feature branch based on "main" to add 2011 data
new_repo.create_new_branch(name="2011_feature")

# Much like the 09 SNODAS files were collected, do the same for 2010
snodas_11_vds = virtualize_and_concat_archival_files_on_time(
    location=NGWPCLocations.SNODAS_REF.path,
    file_date_pattern="zz_ssmv11034tS__T0001TTNATS*05HP001.nc",
    file_type=FileType.NETCDF,
    manual_file_pattern="zz_ssmv11034tS__T0001TTNATS2011*.nc",
    loadable_vars=["crs"],
    testing_file_quantity=5,
)

# Append 2011 data to SNODAS repo's new branch with a new snapshot
new_repo.append_virt_data_to_store(
    vds=snodas_11_vds, append_dim="time", commit="Appended new data from the year 2011", branch="2011_feature"
)

In [None]:
# Now that we have a new branch with new 2011 data, print the history of both branches
print("NEW BRANCH =====================================")
new_repo.print_history(branch="2011_feature")
print("MAIN BRANCH ====================================")
new_repo.print_history(branch="main")

In [None]:
# Print both branch's datasets - notice the new one has 2011 data
snodas_data_feat_branch = new_repo.retrieve_dataset(branch="2011_feature")
print("NEW BRANCH ========================================================")
print(snodas_data_feat_branch)
print("===================================================================")
print("MAIN BRANCH =======================================================")
print(snodas_data)
print("===================================================================")

In [None]:
# Cleanup - delete the test repo entirely
new_repo.delete_repo(quiet=True)