In [None]:
import geopandas as gpd

from distributed import LocalCluster
from distributed import Client

cluster = LocalCluster(n_workers=8, threads_per_worker=4)
client = Client(cluster)

In [None]:
southern_rockies = gpd.read_file("/data-store/output/southern_rockies.geojson", driver="GeoJSON")

# intersect with GLAD ARD Tiles
ard_tiles = gpd.read_file("https://glad.umd.edu/users/Potapov/ARD/Global_ARD_tiles.zip")
relevant_ard_tiles = ard_tiles.clip(southern_rockies)

In [None]:
relevant_ard_tiles.explore()

In [None]:
relevant_ard_tiles[relevant_ard_tiles['TILE']=="105W_39N"].to_file("team2_aoi.geojson", driver="GeoJSON")

In [None]:
import pandas as pd

In [None]:
import numpy as np
tiles = ["105W_39N"]
start_year = 1997
end_year = 2023
start_codes = np.cumsum(np.repeat(23, end_year-start_year + 1)) + 392 - 23
year_code_maps = {year: list(range(start, start + 23)) for year, start in  zip(range(start_year, end_year+1), start_codes)}

pattern = "https://glad.umd.edu/dataset/glad_ard2/{lat}/{tile}/{period}.tif"

rows = []
for tile in tiles:
    for year, year_codes in year_code_maps.items():
        for code in year_codes:
            rows.append((tile, year, code, pattern.format(lat=tile.split("_")[1], tile=tile, period=code)))

ard_assets = pd.DataFrame(rows, columns=['tile','year','16-day-code','url'])

Then use aria2 to download the files from the server over http e.g. 
```
conda install ariac
aria2c -i flist.txt -j 8
```

In [None]:
import rioxarray
import glob
import xarray as xr

In [None]:
files = glob.glob("data/*.tif")
files.sort(key = lambda x: int(x.split("/")[1].split(".")[0]))

In [None]:
reverse_code_to_year = {}

for key, values in year_code_maps.items():
    for value in values:
        reverse_code_to_year[value] = key

In [None]:
dsets = []
for f in files:
    time_code = int(f.split("/")[1].split(".")[0])
    year = reverse_code_to_year[time_code]
    dset = rioxarray.open_rasterio(f, chunks={"x": 1024, "y": 1024})
    # just use year information for now
    dset = dset.expand_dims("time")
    dset = dset.assign_coords(coords={'time': [datetime(year=year, month=1, day=1)]})
    dsets.append(dset)
dset = xr.concat(dsets, dim="time")

# simplest QF flag imagineable
filtered = dset.where(dset.sel(band=8) == 1)

# median composition over each year
r = filtered.resample(time="1y").median()

# unify chunks
r = r.chunk({"x":1024, "y": 1024})

In [None]:
# took 60 mins
r.to_zarr("105W_39N_annual_median_composite.zarr", mode="w")

In [None]:
import xarray as xr
from geogif import gif

dset = xr.open_zarr("105W_39N_annual_median_composite.zarr")
dset[[3,2,1]]