In [8]:
import fsspec
from virtualizarr import open_virtual_dataset
import xarray as xr
import tqdm


In [2]:
fs = fsspec.filesystem('s3', anon=True) 

In [3]:
himawari_bucket_files = fs.ls('noaa-himawari9')
himawari_bucket_files

['noaa-himawari9/AHI-L1b-FLDK',
 'noaa-himawari9/AHI-L1b-Japan',
 'noaa-himawari9/AHI-L1b-Target',
 'noaa-himawari9/AHI-L2-FLDK-Clouds',
 'noaa-himawari9/AHI-L2-FLDK-ISatSS',
 'noaa-himawari9/AHI-L2-FLDK-Winds',
 'noaa-himawari9/index.html']

In [4]:
fs.ls('s3://noaa-himawari9/AHI-L2-FLDK-Clouds/2025/03/11/')

['noaa-himawari9/AHI-L2-FLDK-Clouds/2025/03/11/0000',
 'noaa-himawari9/AHI-L2-FLDK-Clouds/2025/03/11/0010',
 'noaa-himawari9/AHI-L2-FLDK-Clouds/2025/03/11/0020',
 'noaa-himawari9/AHI-L2-FLDK-Clouds/2025/03/11/0030',
 'noaa-himawari9/AHI-L2-FLDK-Clouds/2025/03/11/0040',
 'noaa-himawari9/AHI-L2-FLDK-Clouds/2025/03/11/0050',
 'noaa-himawari9/AHI-L2-FLDK-Clouds/2025/03/11/0100',
 'noaa-himawari9/AHI-L2-FLDK-Clouds/2025/03/11/0110',
 'noaa-himawari9/AHI-L2-FLDK-Clouds/2025/03/11/0120',
 'noaa-himawari9/AHI-L2-FLDK-Clouds/2025/03/11/0130',
 'noaa-himawari9/AHI-L2-FLDK-Clouds/2025/03/11/0140',
 'noaa-himawari9/AHI-L2-FLDK-Clouds/2025/03/11/0150',
 'noaa-himawari9/AHI-L2-FLDK-Clouds/2025/03/11/0200',
 'noaa-himawari9/AHI-L2-FLDK-Clouds/2025/03/11/0210',
 'noaa-himawari9/AHI-L2-FLDK-Clouds/2025/03/11/0220',
 'noaa-himawari9/AHI-L2-FLDK-Clouds/2025/03/11/0230',
 'noaa-himawari9/AHI-L2-FLDK-Clouds/2025/03/11/0250',
 'noaa-himawari9/AHI-L2-FLDK-Clouds/2025/03/11/0300',
 'noaa-himawari9/AHI-L2-FLDK

In [5]:
YYYY = '2025'
MM = '03'
DD = '11'
HHmm = '0000'
# HHmm = None

In [10]:
# if HHmm have not null
if HHmm:
	himawari_files = fs.glob(f's3://noaa-himawari9/AHI-L2-FLDK-Clouds/{YYYY}/{MM}/{DD}/{HHmm}/*.nc')
else:
	himawari_files = fs.glob(f's3://noaa-himawari9/AHI-L2-FLDK-Clouds/{YYYY}/{MM}/{DD}/*/*.nc')


himawari_sorted_files = sorted(['s3://'+f for f in himawari_files])

print(f'Dataset info:\n  Total number: {len(himawari_sorted_files)}\n  First file: {himawari_sorted_files[0]}\n  Last file: {himawari_sorted_files[-1]}')

Dataset info:
  Total number: 3
  First file: s3://noaa-himawari9/AHI-L2-FLDK-Clouds/2025/03/11/0000/AHI-CHGT_v1r1_h09_s202503110000207_e202503110009401_c202503110015101.nc
  Last file: s3://noaa-himawari9/AHI-L2-FLDK-Clouds/2025/03/11/0000/AHI-CPHS_v1r1_h09_s202503110000207_e202503110009401_c202503110015101.nc


In [25]:
ds = xr.open_dataset(
    himawari_sorted_files[0],
    engine='h5netcdf',
    storage_options={
        'anon': True,
        'default_fill_cache': False,  # 減少記憶體使用
        'default_cache_type': 'readahead',  # 使用預讀緩存
    }
)
ds


In [None]:
ds_multi = xr.open_mfdataset(
    himawari_sorted_files,  # 傳入檔案列表
    engine='h5netcdf',
    combine='by_coords',    # 按座標合併
    parallel=True,          # 並行處理
    storage_options={
        'anon': True,
        'default_fill_cache': False,
        'default_cache_type': 'readahead',
    },
    chunks={'auto': True}
)


In [21]:
ds = xr.open_dataset(
    himawari_sorted_files[0],
    engine='icechunk',
    storage_options={
        'anon': True,
        'default_fill_cache': False,  # 減少記憶體使用
        'default_cache_type': 'readahead',  # 使用預讀緩存
    }
)
ds


In [24]:
print(ds)

<xarray.Dataset> Size: 4GB
Dimensions:                     (Rows: 5500, Columns: 5500, CldHgtFlgCnst: 5,
                                 y: 5500, x: 5500)
Coordinates:
    Latitude                    (Rows, Columns) float32 121MB ...
    Longitude                   (Rows, Columns) float32 121MB ...
Dimensions without coordinates: Rows, Columns, CldHgtFlgCnst, y, x
Data variables: (12/48)
    StartRow                    int32 4B ...
    StartColumn                 int32 4B ...
    AchaMode                    float64 8B ...
    NumOfQualityFlag            float32 4B ...
    Shadow_Mask                 (Rows, Columns) float32 121MB ...
    Latitude_Pc                 (Rows, Columns) float32 121MB ...
    ...                          ...
    granule_level_quality_flag  float64 8B ...
    CldOptDpthAWIPS             (y, x) float32 121MB ...
    CldTopHghtAWIPS             (y, x) float32 121MB ...
    CldTopTempAWIPS             (y, x) float32 121MB ...
    CldTopPresAWIPS             (y, x

In [None]:
# virtual_ds = xr.concat(
#     himawari_dataset,
#     dim='time',
#     coords='minimal', 
#     compat='override',
#     combine_attrs='override'
# )

# virtual_ds