In [1]:
import os
os.chdir("../")

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import pystac_client
import pystac
from requests.adapters import HTTPAdapter
from urllib3 import Retry
from pystac_client.stac_api_io import StacApiIO
import planetary_computer
import warnings
import dask.distributed
import numpy as np
import rioxarray
import pandas as pd
import geopandas as gpd
from src.utils import search_s2_scenes, search_s1_scenes, search_landsat_scenes, search_dem_scene, search_lc_scene 
from src.utils import stack_data, stack_dem_data, stack_lc_data, unique_class, missing_values, gen_chips
import yaml
from dask.distributed import Client, LocalCluster
import logging
from pathlib import Path
import shutil
from functools import reduce
from shapely import box
from src.lc_generation import pystac_itemcollection_to_gdf, process_aoi
from src.lc_generation import process_chips, process_array

In [4]:
config_path = '/home/benchuser/code/config.yml'

In [5]:
warnings.filterwarnings("ignore")
logging.getLogger("distributed").setLevel(logging.ERROR)
logging.getLogger("dask").setLevel(logging.ERROR)

with open(config_path, "r") as file:
    config = yaml.safe_load(file)

version = config['dataset']['version']+'ipynb_test'
working_dir = Path(config['working_dir'])
output_dir = Path(config['output_dir'])
(working_dir / version).mkdir(exist_ok=True)
metadata_filename = config['metadata']['file']
aoi_version = config['aoi']['version']

(working_dir / version).mkdir(exist_ok=True)
shutil.copy(config_path, working_dir / version / "config.yaml")


cluster = LocalCluster(silence_logs=logging.ERROR)
client = Client(cluster)
print(client.dashboard_link)

retry = Retry(
    total=10, backoff_factor=1, status_forcelist=[502, 503, 504], allowed_methods=None
)
stac_api_io = StacApiIO(max_retries=retry)

catalog = pystac_client.Client.open(
    "https://planetarycomputer.microsoft.com/api/stac/v1",
    modifier=planetary_computer.sign_inplace,
    stac_io=stac_api_io
)


try:
    aoi_path = (working_dir / version / f'{aoi_version}.geojson')
    aoi_gdf = gpd.read_file(aoi_path)
    metadata_df = pd.read_csv(working_dir / version / metadata_filename)
    global_index = metadata_df['chip_id'].max() + 1
except: 
    aoi_path = (f'/home/benchuser/code/data/map_{aoi_version}.geojson')
    aoi_gdf = gpd.read_file(aoi_path)
    aoi_gdf['processed'] = False
    aoi_gdf = aoi_gdf.drop(config['excluded_aoi_indices'])
    aoi_gdf.to_file(working_dir / version / f'{aoi_version}.geojson', driver = 'GeoJSON')
    metadata_df = pd.DataFrame(columns=["chip_id", "aoi_index", "s2_dates", "s1_dates", "landsat_dates", "lc", "x_center", "y_center", "epsg"])
    global_index = 0

aoi_path = working_dir / version / f'{aoi_version}.geojson'


for index, aoi in aoi_gdf.iterrows():
    if aoi_gdf.iloc[index]['processed']:
        print(f'AOI at index {index} already processed, continuing to next...')
        continue
    try:
        global_index, metadata_df = process_aoi(
            index,
            aoi,
            config,
            catalog,
            global_index,
            metadata_df,
            working_dir,
            version,
            metadata_filename
        )
    finally:
        aoi_gdf.loc[index, 'processed'] = True
        aoi_gdf.to_file(aoi_path, driver='GeoJSON')


http://127.0.0.1:33627/status
